BryanW commited on
Commit
a794b88
·
verified ·
1 Parent(s): 43f5fc5

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/__pycache__/__init__.cpython-312.pyc +0 -0
  2. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/Openacc/cupti_openacc.h +98 -0
  3. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/Openmp/cupti_openmp.h +100 -0
  4. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/Openmp/omp-tools.h +1083 -0
  5. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/__init__.py +0 -0
  6. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/__pycache__/__init__.cpython-312.pyc +0 -0
  7. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cuda_stdint.h +112 -0
  8. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti.h +123 -0
  9. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_activity.h +0 -0
  10. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_activity_deprecated.h +0 -0
  11. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_callbacks.h +863 -0
  12. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_checkpoint.h +127 -0
  13. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_common.h +93 -0
  14. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_driver_cbid.h +799 -0
  15. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_events.h +1349 -0
  16. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_metrics.h +824 -0
  17. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_nvtx_cbid.h +111 -0
  18. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_pcsampling.h +936 -0
  19. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_pcsampling_util.h +402 -0
  20. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_pmsampling.h +490 -0
  21. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_profiler_host.h +541 -0
  22. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_profiler_target.h +602 -0
  23. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_range_profiler.h +465 -0
  24. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_result.h +360 -0
  25. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_runtime_cbid.h +504 -0
  26. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_sass_metrics.h +436 -0
  27. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_target.h +43 -0
  28. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_version.h +137 -0
  29. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_cudaGL_meta.h +116 -0
  30. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_cudaVDPAU_meta.h +46 -0
  31. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_cuda_gl_interop_meta.h +71 -0
  32. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_cuda_meta.h +0 -0
  33. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_cuda_runtime_api_meta.h +2372 -0
  34. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_cuda_vdpau_interop_meta.h +38 -0
  35. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_cudart_removed_meta.h +162 -0
  36. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_nvtx_meta.h +247 -0
  37. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/nvperf_common.h +393 -0
  38. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/nvperf_cuda_host.h +179 -0
  39. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/nvperf_host.h +1178 -0
  40. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/nvperf_target.h +626 -0
  41. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/lib/__init__.py +0 -0
  42. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/lib/__pycache__/__init__.cpython-312.pyc +0 -0
  43. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cufile/__pycache__/__init__.cpython-312.pyc +0 -0
  44. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cufile/include/__init__.py +0 -0
  45. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cufile/include/__pycache__/__init__.cpython-312.pyc +0 -0
  46. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cufile/include/cufile.h +740 -0
  47. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cufile/lib/__init__.py +0 -0
  48. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cufile/lib/__pycache__/__init__.cpython-312.pyc +0 -0
  49. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cufile/lib/libcufile_rdma.so.1 +0 -0
  50. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/curand/__pycache__/__init__.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (223 Bytes). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/Openacc/cupti_openacc.h ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2017 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #include <cuda_stdint.h>
51
+
52
+ #if !defined(_CUPTI_OPENACC_H_)
53
+ #define _CUPTI_OPENACC_H_
54
+
55
+ #ifndef CUPTIAPI
56
+ #ifdef _WIN32
57
+ #define CUPTIAPI __stdcall
58
+ #else
59
+ #define CUPTIAPI
60
+ #endif
61
+ #endif
62
+
63
+ #if defined(__LP64__)
64
+ #define CUPTILP64 1
65
+ #elif defined(_WIN64)
66
+ #define CUPTILP64 1
67
+ #else
68
+ #undef CUPTILP64
69
+ #endif
70
+
71
+ #if defined(__cplusplus)
72
+ extern "C" {
73
+ #endif
74
+
75
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
76
+ #pragma GCC visibility push(default)
77
+ #endif
78
+
79
+ /**
80
+ * \brief Initialize OpenACC support
81
+ *
82
+ * \param profRegister function of type acc_prof_reg as obtained from acc_register_library
83
+ * \param profUnregister function of type acc_prof_reg as obtained from acc_register_library
84
+ * \param profLookup function of type acc_prof_lookup as obtained from acc_register_library
85
+ */
86
+ CUptiResult CUPTIAPI
87
+ cuptiOpenACCInitialize(void *profRegister, void *profUnregister, void *profLookup);
88
+
89
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
90
+ #pragma GCC visibility pop
91
+ #endif
92
+
93
+ #if defined(__cplusplus)
94
+ }
95
+ #endif
96
+
97
+ #endif /*_CUPTI_OPENACC_H_*/
98
+
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/Openmp/cupti_openmp.h ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2018 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #include <cuda_stdint.h>
51
+ #include "Openmp/omp-tools.h"
52
+
53
+ #if !defined(_CUPTI_OPENMP_H_)
54
+ #define _CUPTI_OPENMP_H_
55
+
56
+ #ifndef CUPTIAPI
57
+ #ifdef _WIN32
58
+ #define CUPTIAPI __stdcall
59
+ #else
60
+ #define CUPTIAPI
61
+ #endif
62
+ #endif
63
+
64
+ #if defined(__LP64__)
65
+ #define CUPTILP64 1
66
+ #elif defined(_WIN64)
67
+ #define CUPTILP64 1
68
+ #else
69
+ #undef CUPTILP64
70
+ #endif
71
+
72
+ #if defined(__cplusplus)
73
+ extern "C" {
74
+ #endif
75
+
76
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
77
+ #pragma GCC visibility push(default)
78
+ #endif
79
+
80
+ /**
81
+ * \brief Initialize OPENMP support (deprecated, used before OpenMP 5.0)
82
+ *
83
+ */
84
+ int CUPTIAPI cuptiOpenMpInitialize(ompt_function_lookup_t ompt_fn_lookup, const char *runtime_version, unsigned int ompt_version);
85
+
86
+ /**
87
+ * \brief Initialize OPENMP support
88
+ *
89
+ */
90
+ int CUPTIAPI cuptiOpenMpInitialize_v2(ompt_function_lookup_t lookup, int initial_device_num, ompt_data_t *tool_data);
91
+
92
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
93
+ #pragma GCC visibility pop
94
+ #endif
95
+
96
+ #if defined(__cplusplus)
97
+ }
98
+ #endif
99
+
100
+ #endif /*_CUPTI_OPENMP_H_*/
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/Openmp/omp-tools.h ADDED
@@ -0,0 +1,1083 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * include/50/omp-tools.h.var
3
+ */
4
+
5
+ //===----------------------------------------------------------------------===//
6
+ //
7
+ // The LLVM Compiler Infrastructure
8
+ //
9
+ // This file is dual licensed under the MIT and the University of Illinois Open
10
+ // Source Licenses. See LICENSE.txt for details.
11
+ //
12
+ //===----------------------------------------------------------------------===//
13
+
14
+ #ifndef __OMPT__
15
+ #define __OMPT__
16
+
17
+ /*****************************************************************************
18
+ * system include files
19
+ *****************************************************************************/
20
+
21
+ #include <stdint.h>
22
+ #include <stddef.h>
23
+
24
+ /*****************************************************************************
25
+ * iteration macros
26
+ *****************************************************************************/
27
+
28
+ #define FOREACH_OMPT_INQUIRY_FN(macro) \
29
+ macro (ompt_enumerate_states) \
30
+ macro (ompt_enumerate_mutex_impls) \
31
+ \
32
+ macro (ompt_set_callback) \
33
+ macro (ompt_get_callback) \
34
+ \
35
+ macro (ompt_get_state) \
36
+ \
37
+ macro (ompt_get_parallel_info) \
38
+ macro (ompt_get_task_info) \
39
+ macro (ompt_get_task_memory) \
40
+ macro (ompt_get_thread_data) \
41
+ macro (ompt_get_unique_id) \
42
+ macro (ompt_finalize_tool) \
43
+ \
44
+ macro(ompt_get_num_procs) \
45
+ macro(ompt_get_num_places) \
46
+ macro(ompt_get_place_proc_ids) \
47
+ macro(ompt_get_place_num) \
48
+ macro(ompt_get_partition_place_nums) \
49
+ macro(ompt_get_proc_id) \
50
+ \
51
+ macro(ompt_get_target_info) \
52
+ macro(ompt_get_num_devices)
53
+
54
+ #define FOREACH_OMPT_STATE(macro) \
55
+ \
56
+ /* first available state */ \
57
+ macro (ompt_state_undefined, 0x102) /* undefined thread state */ \
58
+ \
59
+ /* work states (0..15) */ \
60
+ macro (ompt_state_work_serial, 0x000) /* working outside parallel */ \
61
+ macro (ompt_state_work_parallel, 0x001) /* working within parallel */ \
62
+ macro (ompt_state_work_reduction, 0x002) /* performing a reduction */ \
63
+ \
64
+ /* barrier wait states (16..31) */ \
65
+ macro (ompt_state_wait_barrier, 0x010) /* waiting at a barrier */ \
66
+ macro (ompt_state_wait_barrier_implicit_parallel, 0x011) \
67
+ /* implicit barrier at the end of parallel region */\
68
+ macro (ompt_state_wait_barrier_implicit_workshare, 0x012) \
69
+ /* implicit barrier at the end of worksharing */ \
70
+ macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \
71
+ macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \
72
+ \
73
+ /* task wait states (32..63) */ \
74
+ macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \
75
+ macro (ompt_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \
76
+ \
77
+ /* mutex wait states (64..127) */ \
78
+ macro (ompt_state_wait_mutex, 0x040) \
79
+ macro (ompt_state_wait_lock, 0x041) /* waiting for lock */ \
80
+ macro (ompt_state_wait_critical, 0x042) /* waiting for critical */ \
81
+ macro (ompt_state_wait_atomic, 0x043) /* waiting for atomic */ \
82
+ macro (ompt_state_wait_ordered, 0x044) /* waiting for ordered */ \
83
+ \
84
+ /* target wait states (128..255) */ \
85
+ macro (ompt_state_wait_target, 0x080) /* waiting for target region */ \
86
+ macro (ompt_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \
87
+ macro (ompt_state_wait_target_update, 0x082) /* waiting for target update operation */ \
88
+ \
89
+ /* misc (256..511) */ \
90
+ macro (ompt_state_idle, 0x100) /* waiting for work */ \
91
+ macro (ompt_state_overhead, 0x101) /* overhead excluding wait states */ \
92
+ \
93
+ /* implementation-specific states (512..) */
94
+
95
+
96
+ #define FOREACH_KMP_MUTEX_IMPL(macro) \
97
+ macro (kmp_mutex_impl_none, 0) /* unknown implementation */ \
98
+ macro (kmp_mutex_impl_spin, 1) /* based on spin */ \
99
+ macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \
100
+ macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */
101
+
102
+ #define FOREACH_OMPT_EVENT(macro) \
103
+ \
104
+ /*--- Mandatory Events ---*/ \
105
+ macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \
106
+ macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \
107
+ \
108
+ macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \
109
+ macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \
110
+ \
111
+ macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \
112
+ macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \
113
+ macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \
114
+ \
115
+ macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \
116
+ macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \
117
+ macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \
118
+ \
119
+ macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \
120
+ \
121
+ macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \
122
+ macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \
123
+ \
124
+ macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \
125
+ macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \
126
+ \
127
+ /* Optional Events */ \
128
+ macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \
129
+ \
130
+ macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) /* mutex released */ \
131
+ \
132
+ macro (ompt_callback_dependences, ompt_callback_dependences_t, 18) /* report task dependences */ \
133
+ macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) /* report task dependence */ \
134
+ \
135
+ macro (ompt_callback_work, ompt_callback_work_t, 20) /* task at work begin or end */ \
136
+ \
137
+ macro (ompt_callback_master, ompt_callback_master_t, 21) /* task at master begin or end */ \
138
+ \
139
+ macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \
140
+ \
141
+ macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \
142
+ \
143
+ macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 24) /* lock init */ \
144
+ macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 25) /* lock destroy */ \
145
+ \
146
+ macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26) /* mutex acquire */ \
147
+ macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 27) /* mutex acquired */ \
148
+ \
149
+ macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 28) /* nest lock */ \
150
+ \
151
+ macro (ompt_callback_flush, ompt_callback_flush_t, 29) /* after executing flush */ \
152
+ \
153
+ macro (ompt_callback_cancel, ompt_callback_cancel_t, 30) /* cancel innermost binding region */ \
154
+ \
155
+ macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) /* reduction */ \
156
+ \
157
+ macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */
158
+
159
+ /*****************************************************************************
160
+ * implementation specific types
161
+ *****************************************************************************/
162
+
163
+ typedef enum kmp_mutex_impl_t {
164
+ #define kmp_mutex_impl_macro(impl, code) impl = code,
165
+ FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro)
166
+ #undef kmp_mutex_impl_macro
167
+ } kmp_mutex_impl_t;
168
+
169
+ /*****************************************************************************
170
+ * definitions generated from spec
171
+ *****************************************************************************/
172
+
173
+ typedef enum ompt_callbacks_t {
174
+ ompt_callback_thread_begin = 1,
175
+ ompt_callback_thread_end = 2,
176
+ ompt_callback_parallel_begin = 3,
177
+ ompt_callback_parallel_end = 4,
178
+ ompt_callback_task_create = 5,
179
+ ompt_callback_task_schedule = 6,
180
+ ompt_callback_implicit_task = 7,
181
+ ompt_callback_target = 8,
182
+ ompt_callback_target_data_op = 9,
183
+ ompt_callback_target_submit = 10,
184
+ ompt_callback_control_tool = 11,
185
+ ompt_callback_device_initialize = 12,
186
+ ompt_callback_device_finalize = 13,
187
+ ompt_callback_device_load = 14,
188
+ ompt_callback_device_unload = 15,
189
+ ompt_callback_sync_region_wait = 16,
190
+ ompt_callback_mutex_released = 17,
191
+ ompt_callback_dependences = 18,
192
+ ompt_callback_task_dependence = 19,
193
+ ompt_callback_work = 20,
194
+ ompt_callback_master = 21,
195
+ ompt_callback_target_map = 22,
196
+ ompt_callback_sync_region = 23,
197
+ ompt_callback_lock_init = 24,
198
+ ompt_callback_lock_destroy = 25,
199
+ ompt_callback_mutex_acquire = 26,
200
+ ompt_callback_mutex_acquired = 27,
201
+ ompt_callback_nest_lock = 28,
202
+ ompt_callback_flush = 29,
203
+ ompt_callback_cancel = 30,
204
+ ompt_callback_reduction = 31,
205
+ ompt_callback_dispatch = 32
206
+ } ompt_callbacks_t;
207
+
208
+ typedef enum ompt_record_t {
209
+ ompt_record_ompt = 1,
210
+ ompt_record_native = 2,
211
+ ompt_record_invalid = 3
212
+ } ompt_record_t;
213
+
214
+ typedef enum ompt_record_native_t {
215
+ ompt_record_native_info = 1,
216
+ ompt_record_native_event = 2
217
+ } ompt_record_native_t;
218
+
219
+ typedef enum ompt_set_result_t {
220
+ ompt_set_error = 0,
221
+ ompt_set_never = 1,
222
+ ompt_set_impossible = 2,
223
+ ompt_set_sometimes = 3,
224
+ ompt_set_sometimes_paired = 4,
225
+ ompt_set_always = 5
226
+ } ompt_set_result_t;
227
+
228
+ typedef uint64_t ompt_id_t;
229
+
230
+ typedef uint64_t ompt_device_time_t;
231
+
232
+ typedef uint64_t ompt_buffer_cursor_t;
233
+
234
+ typedef enum ompt_thread_t {
235
+ ompt_thread_initial = 1,
236
+ ompt_thread_worker = 2,
237
+ ompt_thread_other = 3,
238
+ ompt_thread_unknown = 4
239
+ } ompt_thread_t;
240
+
241
+ typedef enum ompt_scope_endpoint_t {
242
+ ompt_scope_begin = 1,
243
+ ompt_scope_end = 2
244
+ } ompt_scope_endpoint_t;
245
+
246
+ typedef enum ompt_dispatch_t {
247
+ ompt_dispatch_iteration = 1,
248
+ ompt_dispatch_section = 2
249
+ } ompt_dispatch_t;
250
+
251
+ typedef enum ompt_sync_region_t {
252
+ ompt_sync_region_barrier = 1,
253
+ ompt_sync_region_barrier_implicit = 2,
254
+ ompt_sync_region_barrier_explicit = 3,
255
+ ompt_sync_region_barrier_implementation = 4,
256
+ ompt_sync_region_taskwait = 5,
257
+ ompt_sync_region_taskgroup = 6,
258
+ ompt_sync_region_reduction = 7
259
+ } ompt_sync_region_t;
260
+
261
+ typedef enum ompt_target_data_op_t {
262
+ ompt_target_data_alloc = 1,
263
+ ompt_target_data_transfer_to_device = 2,
264
+ ompt_target_data_transfer_from_device = 3,
265
+ ompt_target_data_delete = 4,
266
+ ompt_target_data_associate = 5,
267
+ ompt_target_data_disassociate = 6
268
+ } ompt_target_data_op_t;
269
+
270
+ typedef enum ompt_work_t {
271
+ ompt_work_loop = 1,
272
+ ompt_work_sections = 2,
273
+ ompt_work_single_executor = 3,
274
+ ompt_work_single_other = 4,
275
+ ompt_work_workshare = 5,
276
+ ompt_work_distribute = 6,
277
+ ompt_work_taskloop = 7
278
+ } ompt_work_t;
279
+
280
+ typedef enum ompt_mutex_t {
281
+ ompt_mutex_lock = 1,
282
+ ompt_mutex_test_lock = 2,
283
+ ompt_mutex_nest_lock = 3,
284
+ ompt_mutex_test_nest_lock = 4,
285
+ ompt_mutex_critical = 5,
286
+ ompt_mutex_atomic = 6,
287
+ ompt_mutex_ordered = 7
288
+ } ompt_mutex_t;
289
+
290
+ typedef enum ompt_native_mon_flag_t {
291
+ ompt_native_data_motion_explicit = 0x01,
292
+ ompt_native_data_motion_implicit = 0x02,
293
+ ompt_native_kernel_invocation = 0x04,
294
+ ompt_native_kernel_execution = 0x08,
295
+ ompt_native_driver = 0x10,
296
+ ompt_native_runtime = 0x20,
297
+ ompt_native_overhead = 0x40,
298
+ ompt_native_idleness = 0x80
299
+ } ompt_native_mon_flag_t;
300
+
301
+ typedef enum ompt_task_flag_t {
302
+ ompt_task_initial = 0x00000001,
303
+ ompt_task_implicit = 0x00000002,
304
+ ompt_task_explicit = 0x00000004,
305
+ ompt_task_target = 0x00000008,
306
+ ompt_task_undeferred = 0x08000000,
307
+ ompt_task_untied = 0x10000000,
308
+ ompt_task_final = 0x20000000,
309
+ ompt_task_mergeable = 0x40000000,
310
+ ompt_task_merged = 0x80000000
311
+ } ompt_task_flag_t;
312
+
313
+ typedef enum ompt_task_status_t {
314
+ ompt_task_complete = 1,
315
+ ompt_task_yield = 2,
316
+ ompt_task_cancel = 3,
317
+ ompt_task_detach = 4,
318
+ ompt_task_early_fulfill = 5,
319
+ ompt_task_late_fulfill = 6,
320
+ ompt_task_switch = 7
321
+ } ompt_task_status_t;
322
+
323
+ typedef enum ompt_target_t {
324
+ ompt_target = 1,
325
+ ompt_target_enter_data = 2,
326
+ ompt_target_exit_data = 3,
327
+ ompt_target_update = 4
328
+ } ompt_target_t;
329
+
330
+ typedef enum ompt_parallel_flag_t {
331
+ ompt_parallel_invoker_program = 0x00000001,
332
+ ompt_parallel_invoker_runtime = 0x00000002,
333
+ ompt_parallel_league = 0x40000000,
334
+ ompt_parallel_team = 0x80000000
335
+ } ompt_parallel_flag_t;
336
+
337
+ typedef enum ompt_target_map_flag_t {
338
+ ompt_target_map_flag_to = 0x01,
339
+ ompt_target_map_flag_from = 0x02,
340
+ ompt_target_map_flag_alloc = 0x04,
341
+ ompt_target_map_flag_release = 0x08,
342
+ ompt_target_map_flag_delete = 0x10,
343
+ ompt_target_map_flag_implicit = 0x20
344
+ } ompt_target_map_flag_t;
345
+
346
+ typedef enum ompt_dependence_type_t {
347
+ ompt_dependence_type_in = 1,
348
+ ompt_dependence_type_out = 2,
349
+ ompt_dependence_type_inout = 3,
350
+ ompt_dependence_type_mutexinoutset = 4,
351
+ ompt_dependence_type_source = 5,
352
+ ompt_dependence_type_sink = 6
353
+ } ompt_dependence_type_t;
354
+
355
+ typedef enum ompt_cancel_flag_t {
356
+ ompt_cancel_parallel = 0x01,
357
+ ompt_cancel_sections = 0x02,
358
+ ompt_cancel_loop = 0x04,
359
+ ompt_cancel_taskgroup = 0x08,
360
+ ompt_cancel_activated = 0x10,
361
+ ompt_cancel_detected = 0x20,
362
+ ompt_cancel_discarded_task = 0x40
363
+ } ompt_cancel_flag_t;
364
+
365
+ typedef uint64_t ompt_hwid_t;
366
+
367
+ typedef uint64_t ompt_wait_id_t;
368
+
369
+ typedef enum ompt_frame_flag_t {
370
+ ompt_frame_runtime = 0x00,
371
+ ompt_frame_application = 0x01,
372
+ ompt_frame_cfa = 0x10,
373
+ ompt_frame_framepointer = 0x20,
374
+ ompt_frame_stackaddress = 0x30
375
+ } ompt_frame_flag_t;
376
+
377
+ typedef enum ompt_state_t {
378
+ ompt_state_work_serial = 0x000,
379
+ ompt_state_work_parallel = 0x001,
380
+ ompt_state_work_reduction = 0x002,
381
+
382
+ ompt_state_wait_barrier = 0x010,
383
+ ompt_state_wait_barrier_implicit_parallel = 0x011,
384
+ ompt_state_wait_barrier_implicit_workshare = 0x012,
385
+ ompt_state_wait_barrier_implicit = 0x013,
386
+ ompt_state_wait_barrier_explicit = 0x014,
387
+
388
+ ompt_state_wait_taskwait = 0x020,
389
+ ompt_state_wait_taskgroup = 0x021,
390
+
391
+ ompt_state_wait_mutex = 0x040,
392
+ ompt_state_wait_lock = 0x041,
393
+ ompt_state_wait_critical = 0x042,
394
+ ompt_state_wait_atomic = 0x043,
395
+ ompt_state_wait_ordered = 0x044,
396
+
397
+ ompt_state_wait_target = 0x080,
398
+ ompt_state_wait_target_map = 0x081,
399
+ ompt_state_wait_target_update = 0x082,
400
+
401
+ ompt_state_idle = 0x100,
402
+ ompt_state_overhead = 0x101,
403
+ ompt_state_undefined = 0x102
404
+ } ompt_state_t;
405
+
406
+ typedef uint64_t (*ompt_get_unique_id_t) (void);
407
+
408
+ typedef uint64_t ompd_size_t;
409
+
410
+ typedef uint64_t ompd_wait_id_t;
411
+
412
+ typedef uint64_t ompd_addr_t;
413
+ typedef int64_t ompd_word_t;
414
+ typedef uint64_t ompd_seg_t;
415
+
416
+ typedef uint64_t ompd_device_t;
417
+
418
+ typedef uint64_t ompd_thread_id_t;
419
+
420
+ typedef enum ompd_scope_t {
421
+ ompd_scope_global = 1,
422
+ ompd_scope_address_space = 2,
423
+ ompd_scope_thread = 3,
424
+ ompd_scope_parallel = 4,
425
+ ompd_scope_implicit_task = 5,
426
+ ompd_scope_task = 6
427
+ } ompd_scope_t;
428
+
429
+ typedef uint64_t ompd_icv_id_t;
430
+
431
+ typedef enum ompd_rc_t {
432
+ ompd_rc_ok = 0,
433
+ ompd_rc_unavailable = 1,
434
+ ompd_rc_stale_handle = 2,
435
+ ompd_rc_bad_input = 3,
436
+ ompd_rc_error = 4,
437
+ ompd_rc_unsupported = 5,
438
+ ompd_rc_needs_state_tracking = 6,
439
+ ompd_rc_incompatible = 7,
440
+ ompd_rc_device_read_error = 8,
441
+ ompd_rc_device_write_error = 9,
442
+ ompd_rc_nomem = 10,
443
+ } ompd_rc_t;
444
+
445
+ typedef void (*ompt_interface_fn_t) (void);
446
+
447
+ typedef ompt_interface_fn_t (*ompt_function_lookup_t) (
448
+ const char *interface_function_name
449
+ );
450
+
451
+ typedef union ompt_data_t {
452
+ uint64_t value;
453
+ void *ptr;
454
+ } ompt_data_t;
455
+
456
+ typedef struct ompt_frame_t {
457
+ ompt_data_t exit_frame;
458
+ ompt_data_t enter_frame;
459
+ int exit_frame_flags;
460
+ int enter_frame_flags;
461
+ } ompt_frame_t;
462
+
463
+ typedef void (*ompt_callback_t) (void);
464
+
465
+ typedef void ompt_device_t;
466
+
467
+ typedef void ompt_buffer_t;
468
+
469
+ typedef void (*ompt_callback_buffer_request_t) (
470
+ int device_num,
471
+ ompt_buffer_t **buffer,
472
+ size_t *bytes
473
+ );
474
+
475
+ typedef void (*ompt_callback_buffer_complete_t) (
476
+ int device_num,
477
+ ompt_buffer_t *buffer,
478
+ size_t bytes,
479
+ ompt_buffer_cursor_t begin,
480
+ int buffer_owned
481
+ );
482
+
483
+ typedef void (*ompt_finalize_t) (
484
+ ompt_data_t *tool_data
485
+ );
486
+
487
+ typedef int (*ompt_initialize_t) (
488
+ ompt_function_lookup_t lookup,
489
+ int initial_device_num,
490
+ ompt_data_t *tool_data
491
+ );
492
+
493
+ typedef struct ompt_start_tool_result_t {
494
+ ompt_initialize_t initialize;
495
+ ompt_finalize_t finalize;
496
+ ompt_data_t tool_data;
497
+ } ompt_start_tool_result_t;
498
+
499
+ typedef struct ompt_record_abstract_t {
500
+ ompt_record_native_t rclass;
501
+ const char *type;
502
+ ompt_device_time_t start_time;
503
+ ompt_device_time_t end_time;
504
+ ompt_hwid_t hwid;
505
+ } ompt_record_abstract_t;
506
+
507
+ typedef struct ompt_dependence_t {
508
+ ompt_data_t variable;
509
+ ompt_dependence_type_t dependence_type;
510
+ } ompt_dependence_t;
511
+
512
+ typedef int (*ompt_enumerate_states_t) (
513
+ int current_state,
514
+ int *next_state,
515
+ const char **next_state_name
516
+ );
517
+
518
+ typedef int (*ompt_enumerate_mutex_impls_t) (
519
+ int current_impl,
520
+ int *next_impl,
521
+ const char **next_impl_name
522
+ );
523
+
524
+ typedef ompt_set_result_t (*ompt_set_callback_t) (
525
+ ompt_callbacks_t event,
526
+ ompt_callback_t callback
527
+ );
528
+
529
+ typedef int (*ompt_get_callback_t) (
530
+ ompt_callbacks_t event,
531
+ ompt_callback_t *callback
532
+ );
533
+
534
+ typedef ompt_data_t *(*ompt_get_thread_data_t) (void);
535
+
536
+ typedef int (*ompt_get_num_procs_t) (void);
537
+
538
+ typedef int (*ompt_get_num_places_t) (void);
539
+
540
+ typedef int (*ompt_get_place_proc_ids_t) (
541
+ int place_num,
542
+ int ids_size,
543
+ int *ids
544
+ );
545
+
546
+ typedef int (*ompt_get_place_num_t) (void);
547
+
548
+ typedef int (*ompt_get_partition_place_nums_t) (
549
+ int place_nums_size,
550
+ int *place_nums
551
+ );
552
+
553
+ typedef int (*ompt_get_proc_id_t) (void);
554
+
555
+ typedef int (*ompt_get_state_t) (
556
+ ompt_wait_id_t *wait_id
557
+ );
558
+
559
+ typedef int (*ompt_get_parallel_info_t) (
560
+ int ancestor_level,
561
+ ompt_data_t **parallel_data,
562
+ int *team_size
563
+ );
564
+
565
+ typedef int (*ompt_get_task_info_t) (
566
+ int ancestor_level,
567
+ int *flags,
568
+ ompt_data_t **task_data,
569
+ ompt_frame_t **task_frame,
570
+ ompt_data_t **parallel_data,
571
+ int *thread_num
572
+ );
573
+
574
+ typedef int (*ompt_get_task_memory_t)(
575
+ void **addr,
576
+ size_t *size,
577
+ int block
578
+ );
579
+
580
+ typedef int (*ompt_get_target_info_t) (
581
+ uint64_t *device_num,
582
+ ompt_id_t *target_id,
583
+ ompt_id_t *host_op_id
584
+ );
585
+
586
+ typedef int (*ompt_get_num_devices_t) (void);
587
+
588
+ typedef void (*ompt_finalize_tool_t) (void);
589
+
590
+ typedef int (*ompt_get_device_num_procs_t) (
591
+ ompt_device_t *device
592
+ );
593
+
594
+ typedef ompt_device_time_t (*ompt_get_device_time_t) (
595
+ ompt_device_t *device
596
+ );
597
+
598
+ typedef double (*ompt_translate_time_t) (
599
+ ompt_device_t *device,
600
+ ompt_device_time_t time
601
+ );
602
+
603
+ typedef ompt_set_result_t (*ompt_set_trace_ompt_t) (
604
+ ompt_device_t *device,
605
+ unsigned int enable,
606
+ unsigned int etype
607
+ );
608
+
609
+ typedef ompt_set_result_t (*ompt_set_trace_native_t) (
610
+ ompt_device_t *device,
611
+ int enable,
612
+ int flags
613
+ );
614
+
615
+ typedef int (*ompt_start_trace_t) (
616
+ ompt_device_t *device,
617
+ ompt_callback_buffer_request_t request,
618
+ ompt_callback_buffer_complete_t complete
619
+ );
620
+
621
+ typedef int (*ompt_pause_trace_t) (
622
+ ompt_device_t *device,
623
+ int begin_pause
624
+ );
625
+
626
+ typedef int (*ompt_flush_trace_t) (
627
+ ompt_device_t *device
628
+ );
629
+
630
+ typedef int (*ompt_stop_trace_t) (
631
+ ompt_device_t *device
632
+ );
633
+
634
+ typedef int (*ompt_advance_buffer_cursor_t) (
635
+ ompt_device_t *device,
636
+ ompt_buffer_t *buffer,
637
+ size_t size,
638
+ ompt_buffer_cursor_t current,
639
+ ompt_buffer_cursor_t *next
640
+ );
641
+
642
+ typedef ompt_record_t (*ompt_get_record_type_t) (
643
+ ompt_buffer_t *buffer,
644
+ ompt_buffer_cursor_t current
645
+ );
646
+
647
+ typedef void *(*ompt_get_record_native_t) (
648
+ ompt_buffer_t *buffer,
649
+ ompt_buffer_cursor_t current,
650
+ ompt_id_t *host_op_id
651
+ );
652
+
653
+ typedef ompt_record_abstract_t *
654
+ (*ompt_get_record_abstract_t) (
655
+ void *native_record
656
+ );
657
+
658
+ typedef void (*ompt_callback_thread_begin_t) (
659
+ ompt_thread_t thread_type,
660
+ ompt_data_t *thread_data
661
+ );
662
+
663
+ typedef struct ompt_record_thread_begin_t {
664
+ ompt_thread_t thread_type;
665
+ } ompt_record_thread_begin_t;
666
+
667
+ typedef void (*ompt_callback_thread_end_t) (
668
+ ompt_data_t *thread_data
669
+ );
670
+
671
+ typedef void (*ompt_callback_parallel_begin_t) (
672
+ ompt_data_t *encountering_task_data,
673
+ const ompt_frame_t *encountering_task_frame,
674
+ ompt_data_t *parallel_data,
675
+ unsigned int requested_parallelism,
676
+ int flags,
677
+ const void *codeptr_ra
678
+ );
679
+
680
+ typedef struct ompt_record_parallel_begin_t {
681
+ ompt_id_t encountering_task_id;
682
+ ompt_id_t parallel_id;
683
+ unsigned int requested_parallelism;
684
+ int flags;
685
+ const void *codeptr_ra;
686
+ } ompt_record_parallel_begin_t;
687
+
688
+ typedef void (*ompt_callback_parallel_end_t) (
689
+ ompt_data_t *parallel_data,
690
+ ompt_data_t *encountering_task_data,
691
+ int flags,
692
+ const void *codeptr_ra
693
+ );
694
+
695
+ typedef struct ompt_record_parallel_end_t {
696
+ ompt_id_t parallel_id;
697
+ ompt_id_t encountering_task_id;
698
+ int flags;
699
+ const void *codeptr_ra;
700
+ } ompt_record_parallel_end_t;
701
+
702
+ typedef void (*ompt_callback_work_t) (
703
+ ompt_work_t wstype,
704
+ ompt_scope_endpoint_t endpoint,
705
+ ompt_data_t *parallel_data,
706
+ ompt_data_t *task_data,
707
+ uint64_t count,
708
+ const void *codeptr_ra
709
+ );
710
+
711
+ typedef struct ompt_record_work_t {
712
+ ompt_work_t wstype;
713
+ ompt_scope_endpoint_t endpoint;
714
+ ompt_id_t parallel_id;
715
+ ompt_id_t task_id;
716
+ uint64_t count;
717
+ const void *codeptr_ra;
718
+ } ompt_record_work_t;
719
+
720
+ typedef void (*ompt_callback_dispatch_t) (
721
+ ompt_data_t *parallel_data,
722
+ ompt_data_t *task_data,
723
+ ompt_dispatch_t kind,
724
+ ompt_data_t instance
725
+ );
726
+
727
+ typedef struct ompt_record_dispatch_t {
728
+ ompt_id_t parallel_id;
729
+ ompt_id_t task_id;
730
+ ompt_dispatch_t kind;
731
+ ompt_data_t instance;
732
+ } ompt_record_dispatch_t;
733
+
734
+ typedef void (*ompt_callback_task_create_t) (
735
+ ompt_data_t *encountering_task_data,
736
+ const ompt_frame_t *encountering_task_frame,
737
+ ompt_data_t *new_task_data,
738
+ int flags,
739
+ int has_dependences,
740
+ const void *codeptr_ra
741
+ );
742
+
743
+ typedef struct ompt_record_task_create_t {
744
+ ompt_id_t encountering_task_id;
745
+ ompt_id_t new_task_id;
746
+ int flags;
747
+ int has_dependences;
748
+ const void *codeptr_ra;
749
+ } ompt_record_task_create_t;
750
+
751
+ typedef void (*ompt_callback_dependences_t) (
752
+ ompt_data_t *task_data,
753
+ const ompt_dependence_t *deps,
754
+ int ndeps
755
+ );
756
+
757
+ typedef struct ompt_record_dependences_t {
758
+ ompt_id_t task_id;
759
+ ompt_dependence_t dep;
760
+ int ndeps;
761
+ } ompt_record_dependences_t;
762
+
763
+ typedef void (*ompt_callback_task_dependence_t) (
764
+ ompt_data_t *src_task_data,
765
+ ompt_data_t *sink_task_data
766
+ );
767
+
768
+ typedef struct ompt_record_task_dependence_t {
769
+ ompt_id_t src_task_id;
770
+ ompt_id_t sink_task_id;
771
+ } ompt_record_task_dependence_t;
772
+
773
+ typedef void (*ompt_callback_task_schedule_t) (
774
+ ompt_data_t *prior_task_data,
775
+ ompt_task_status_t prior_task_status,
776
+ ompt_data_t *next_task_data
777
+ );
778
+
779
+ typedef struct ompt_record_task_schedule_t {
780
+ ompt_id_t prior_task_id;
781
+ ompt_task_status_t prior_task_status;
782
+ ompt_id_t next_task_id;
783
+ } ompt_record_task_schedule_t;
784
+
785
+ typedef void (*ompt_callback_implicit_task_t) (
786
+ ompt_scope_endpoint_t endpoint,
787
+ ompt_data_t *parallel_data,
788
+ ompt_data_t *task_data,
789
+ unsigned int actual_parallelism,
790
+ unsigned int index,
791
+ int flags
792
+ );
793
+
794
+ typedef struct ompt_record_implicit_task_t {
795
+ ompt_scope_endpoint_t endpoint;
796
+ ompt_id_t parallel_id;
797
+ ompt_id_t task_id;
798
+ unsigned int actual_parallelism;
799
+ unsigned int index;
800
+ int flags;
801
+ } ompt_record_implicit_task_t;
802
+
803
+ typedef void (*ompt_callback_master_t) (
804
+ ompt_scope_endpoint_t endpoint,
805
+ ompt_data_t *parallel_data,
806
+ ompt_data_t *task_data,
807
+ const void *codeptr_ra
808
+ );
809
+
810
+ typedef struct ompt_record_master_t {
811
+ ompt_scope_endpoint_t endpoint;
812
+ ompt_id_t parallel_id;
813
+ ompt_id_t task_id;
814
+ const void *codeptr_ra;
815
+ } ompt_record_master_t;
816
+
817
+ typedef void (*ompt_callback_sync_region_t) (
818
+ ompt_sync_region_t kind,
819
+ ompt_scope_endpoint_t endpoint,
820
+ ompt_data_t *parallel_data,
821
+ ompt_data_t *task_data,
822
+ const void *codeptr_ra
823
+ );
824
+
825
+ typedef struct ompt_record_sync_region_t {
826
+ ompt_sync_region_t kind;
827
+ ompt_scope_endpoint_t endpoint;
828
+ ompt_id_t parallel_id;
829
+ ompt_id_t task_id;
830
+ const void *codeptr_ra;
831
+ } ompt_record_sync_region_t;
832
+
833
+ typedef void (*ompt_callback_mutex_acquire_t) (
834
+ ompt_mutex_t kind,
835
+ unsigned int hint,
836
+ unsigned int impl,
837
+ ompt_wait_id_t wait_id,
838
+ const void *codeptr_ra
839
+ );
840
+
841
+ typedef struct ompt_record_mutex_acquire_t {
842
+ ompt_mutex_t kind;
843
+ unsigned int hint;
844
+ unsigned int impl;
845
+ ompt_wait_id_t wait_id;
846
+ const void *codeptr_ra;
847
+ } ompt_record_mutex_acquire_t;
848
+
849
+ typedef void (*ompt_callback_mutex_t) (
850
+ ompt_mutex_t kind,
851
+ ompt_wait_id_t wait_id,
852
+ const void *codeptr_ra
853
+ );
854
+
855
+ typedef struct ompt_record_mutex_t {
856
+ ompt_mutex_t kind;
857
+ ompt_wait_id_t wait_id;
858
+ const void *codeptr_ra;
859
+ } ompt_record_mutex_t;
860
+
861
+ typedef void (*ompt_callback_nest_lock_t) (
862
+ ompt_scope_endpoint_t endpoint,
863
+ ompt_wait_id_t wait_id,
864
+ const void *codeptr_ra
865
+ );
866
+
867
+ typedef struct ompt_record_nest_lock_t {
868
+ ompt_scope_endpoint_t endpoint;
869
+ ompt_wait_id_t wait_id;
870
+ const void *codeptr_ra;
871
+ } ompt_record_nest_lock_t;
872
+
873
+ typedef void (*ompt_callback_flush_t) (
874
+ ompt_data_t *thread_data,
875
+ const void *codeptr_ra
876
+ );
877
+
878
+ typedef struct ompt_record_flush_t {
879
+ const void *codeptr_ra;
880
+ } ompt_record_flush_t;
881
+
882
+ typedef void (*ompt_callback_cancel_t) (
883
+ ompt_data_t *task_data,
884
+ int flags,
885
+ const void *codeptr_ra
886
+ );
887
+
888
+ typedef struct ompt_record_cancel_t {
889
+ ompt_id_t task_id;
890
+ int flags;
891
+ const void *codeptr_ra;
892
+ } ompt_record_cancel_t;
893
+
894
+ typedef void (*ompt_callback_device_initialize_t) (
895
+ int device_num,
896
+ const char *type,
897
+ ompt_device_t *device,
898
+ ompt_function_lookup_t lookup,
899
+ const char *documentation
900
+ );
901
+
902
+ typedef void (*ompt_callback_device_finalize_t) (
903
+ int device_num
904
+ );
905
+
906
+ typedef void (*ompt_callback_device_load_t) (
907
+ int device_num,
908
+ const char *filename,
909
+ int64_t offset_in_file,
910
+ void *vma_in_file,
911
+ size_t bytes,
912
+ void *host_addr,
913
+ void *device_addr,
914
+ uint64_t module_id
915
+ );
916
+
917
+ typedef void (*ompt_callback_device_unload_t) (
918
+ int device_num,
919
+ uint64_t module_id
920
+ );
921
+
922
+ typedef void (*ompt_callback_target_data_op_t) (
923
+ ompt_id_t target_id,
924
+ ompt_id_t host_op_id,
925
+ ompt_target_data_op_t optype,
926
+ void *src_addr,
927
+ int src_device_num,
928
+ void *dest_addr,
929
+ int dest_device_num,
930
+ size_t bytes,
931
+ const void *codeptr_ra
932
+ );
933
+
934
+ typedef struct ompt_record_target_data_op_t {
935
+ ompt_id_t host_op_id;
936
+ ompt_target_data_op_t optype;
937
+ void *src_addr;
938
+ int src_device_num;
939
+ void *dest_addr;
940
+ int dest_device_num;
941
+ size_t bytes;
942
+ ompt_device_time_t end_time;
943
+ const void *codeptr_ra;
944
+ } ompt_record_target_data_op_t;
945
+
946
+ typedef void (*ompt_callback_target_t) (
947
+ ompt_target_t kind,
948
+ ompt_scope_endpoint_t endpoint,
949
+ int device_num,
950
+ ompt_data_t *task_data,
951
+ ompt_id_t target_id,
952
+ const void *codeptr_ra
953
+ );
954
+
955
+ typedef struct ompt_record_target_t {
956
+ ompt_target_t kind;
957
+ ompt_scope_endpoint_t endpoint;
958
+ int device_num;
959
+ ompt_id_t task_id;
960
+ ompt_id_t target_id;
961
+ const void *codeptr_ra;
962
+ } ompt_record_target_t;
963
+
964
+ typedef void (*ompt_callback_target_map_t) (
965
+ ompt_id_t target_id,
966
+ unsigned int nitems,
967
+ void **host_addr,
968
+ void **device_addr,
969
+ size_t *bytes,
970
+ unsigned int *mapping_flags,
971
+ const void *codeptr_ra
972
+ );
973
+
974
+ typedef struct ompt_record_target_map_t {
975
+ ompt_id_t target_id;
976
+ unsigned int nitems;
977
+ void **host_addr;
978
+ void **device_addr;
979
+ size_t *bytes;
980
+ unsigned int *mapping_flags;
981
+ const void *codeptr_ra;
982
+ } ompt_record_target_map_t;
983
+
984
+ typedef void (*ompt_callback_target_submit_t) (
985
+ ompt_id_t target_id,
986
+ ompt_id_t host_op_id,
987
+ unsigned int requested_num_teams
988
+ );
989
+
990
+ typedef struct ompt_record_target_kernel_t {
991
+ ompt_id_t host_op_id;
992
+ unsigned int requested_num_teams;
993
+ unsigned int granted_num_teams;
994
+ ompt_device_time_t end_time;
995
+ } ompt_record_target_kernel_t;
996
+
997
+ typedef int (*ompt_callback_control_tool_t) (
998
+ uint64_t command,
999
+ uint64_t modifier,
1000
+ void *arg,
1001
+ const void *codeptr_ra
1002
+ );
1003
+
1004
+ typedef struct ompt_record_control_tool_t {
1005
+ uint64_t command;
1006
+ uint64_t modifier;
1007
+ const void *codeptr_ra;
1008
+ } ompt_record_control_tool_t;
1009
+
1010
+ typedef struct ompd_address_t {
1011
+ ompd_seg_t segment;
1012
+ ompd_addr_t address;
1013
+ } ompd_address_t;
1014
+
1015
+ typedef struct ompd_frame_info_t {
1016
+ ompd_address_t frame_address;
1017
+ ompd_word_t frame_flag;
1018
+ } ompd_frame_info_t;
1019
+
1020
+ typedef struct _ompd_aspace_handle ompd_address_space_handle_t;
1021
+ typedef struct _ompd_thread_handle ompd_thread_handle_t;
1022
+ typedef struct _ompd_parallel_handle ompd_parallel_handle_t;
1023
+ typedef struct _ompd_task_handle ompd_task_handle_t;
1024
+
1025
+ typedef struct _ompd_aspace_cont ompd_address_space_context_t;
1026
+ typedef struct _ompd_thread_cont ompd_thread_context_t;
1027
+
1028
+ typedef struct ompd_device_type_sizes_t {
1029
+ uint8_t sizeof_char;
1030
+ uint8_t sizeof_short;
1031
+ uint8_t sizeof_int;
1032
+ uint8_t sizeof_long;
1033
+ uint8_t sizeof_long_long;
1034
+ uint8_t sizeof_pointer;
1035
+ } ompd_device_type_sizes_t;
1036
+
1037
+ typedef struct ompt_record_ompt_t {
1038
+ ompt_callbacks_t type;
1039
+ ompt_device_time_t time;
1040
+ ompt_id_t thread_id;
1041
+ ompt_id_t target_id;
1042
+ union {
1043
+ ompt_record_thread_begin_t thread_begin;
1044
+ ompt_record_parallel_begin_t parallel_begin;
1045
+ ompt_record_parallel_end_t parallel_end;
1046
+ ompt_record_work_t work;
1047
+ ompt_record_dispatch_t dispatch;
1048
+ ompt_record_task_create_t task_create;
1049
+ ompt_record_dependences_t dependences;
1050
+ ompt_record_task_dependence_t task_dependence;
1051
+ ompt_record_task_schedule_t task_schedule;
1052
+ ompt_record_implicit_task_t implicit_task;
1053
+ ompt_record_master_t master;
1054
+ ompt_record_sync_region_t sync_region;
1055
+ ompt_record_mutex_acquire_t mutex_acquire;
1056
+ ompt_record_mutex_t mutex;
1057
+ ompt_record_nest_lock_t nest_lock;
1058
+ ompt_record_flush_t flush;
1059
+ ompt_record_cancel_t cancel;
1060
+ ompt_record_target_t target;
1061
+ ompt_record_target_data_op_t target_data_op;
1062
+ ompt_record_target_map_t target_map;
1063
+ ompt_record_target_kernel_t target_kernel;
1064
+ ompt_record_control_tool_t control_tool;
1065
+ } record;
1066
+ } ompt_record_ompt_t;
1067
+
1068
+ typedef ompt_record_ompt_t *(*ompt_get_record_ompt_t) (
1069
+ ompt_buffer_t *buffer,
1070
+ ompt_buffer_cursor_t current
1071
+ );
1072
+
1073
+ #define ompt_id_none 0
1074
+ #define ompt_data_none {0}
1075
+ #define ompt_time_none 0
1076
+ #define ompt_hwid_none 0
1077
+ #define ompt_addr_none ~0
1078
+ #define ompt_mutex_impl_none 0
1079
+ #define ompt_wait_id_none 0
1080
+
1081
+ #define ompd_segment_none 0
1082
+
1083
+ #endif /* __OMPT__ */
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/__init__.py ADDED
File without changes
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (231 Bytes). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cuda_stdint.h ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2009-2017 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions
6
+ * are met:
7
+ * * Redistributions of source code must retain the above copyright
8
+ * notice, this list of conditions and the following disclaimer.
9
+ * * Redistributions in binary form must reproduce the above copyright
10
+ * notice, this list of conditions and the following disclaimer in the
11
+ * documentation and/or other materials provided with the distribution.
12
+ * * Neither the name of NVIDIA CORPORATION nor the names of its
13
+ * contributors may be used to endorse or promote products derived
14
+ * from this software without specific prior written permission.
15
+ *
16
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
+ */
28
+
29
+ #ifndef __cuda_stdint_h__
30
+ #define __cuda_stdint_h__
31
+
32
+ // Compiler-specific treatment for C99's stdint.h
33
+ //
34
+ // By default, this header will use the standard headers (so it
35
+ // is your responsibility to make sure they are available), except
36
+ // on MSVC before Visual Studio 2010, when they were not provided.
37
+ // To support old MSVC, a few of the commonly-used definitions are
38
+ // provided here. If more definitions are needed, add them here,
39
+ // or replace these definitions with a complete implementation,
40
+ // such as the ones available from Google, Boost, or MSVC10. You
41
+ // can prevent the definition of any of these types (in order to
42
+ // use your own) by #defining CU_STDINT_TYPES_ALREADY_DEFINED.
43
+
44
+ #if !defined(CU_STDINT_TYPES_ALREADY_DEFINED)
45
+
46
+ // In VS including stdint.h forces the C++ runtime dep - provide an opt-out
47
+ // (CU_STDINT_VS_FORCE_NO_STDINT_H) for users that care (notably static
48
+ // cudart).
49
+ #if defined(_MSC_VER) && ((_MSC_VER < 1600) || defined(CU_STDINT_VS_FORCE_NO_STDINT_H))
50
+
51
+ // These definitions can be used with MSVC 8 and 9,
52
+ // which don't ship with stdint.h:
53
+
54
+ typedef unsigned char uint8_t;
55
+
56
+ typedef short int16_t;
57
+ typedef unsigned short uint16_t;
58
+
59
+ // To keep it consistent with all MSVC build. define those types
60
+ // in the exact same way they are defined with the MSVC headers
61
+ #if defined(_MSC_VER)
62
+ typedef signed char int8_t;
63
+
64
+ typedef int int32_t;
65
+ typedef unsigned int uint32_t;
66
+
67
+ typedef long long int64_t;
68
+ typedef unsigned long long uint64_t;
69
+ #else
70
+ typedef char int8_t;
71
+
72
+ typedef long int32_t;
73
+ typedef unsigned long uint32_t;
74
+
75
+ typedef __int64 int64_t;
76
+ typedef unsigned __int64 uint64_t;
77
+ #endif
78
+
79
+ #elif defined(__DJGPP__)
80
+
81
+ // These definitions can be used when compiling
82
+ // C code with DJGPP, which only provides stdint.h
83
+ // when compiling C++ code with TR1 enabled.
84
+
85
+ typedef char int8_t;
86
+ typedef unsigned char uint8_t;
87
+
88
+ typedef short int16_t;
89
+ typedef unsigned short uint16_t;
90
+
91
+ typedef long int32_t;
92
+ typedef unsigned long uint32_t;
93
+
94
+ typedef long long int64_t;
95
+ typedef unsigned long long uint64_t;
96
+
97
+ #else
98
+
99
+ // Use standard headers, as specified by C99 and C++ TR1.
100
+ // Known to be provided by:
101
+ // - gcc/glibc, supported by all versions of glibc
102
+ // - djgpp, supported since 2001
103
+ // - MSVC, supported by Visual Studio 2010 and later
104
+
105
+ #include <stdint.h>
106
+
107
+ #endif
108
+
109
+ #endif // !defined(CU_STDINT_TYPES_ALREADY_DEFINED)
110
+
111
+
112
+ #endif // file guard
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti.h ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2010-2017 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_H_)
51
+ #define _CUPTI_H_
52
+
53
+ #ifdef _WIN32
54
+ #ifndef WIN32_LEAN_AND_MEAN
55
+ #define WIN32_LEAN_AND_MEAN
56
+ #endif
57
+ #ifdef NOMINMAX
58
+ #include <windows.h>
59
+ #else
60
+ #define NOMINMAX
61
+ #include <windows.h>
62
+ #undef NOMINMAX
63
+ #endif
64
+ #endif
65
+
66
+ #include <cuda.h>
67
+ #include <cupti_result.h>
68
+ #include <cupti_version.h>
69
+
70
+ /* Activity, callback, event and metric APIs */
71
+ #include <cupti_activity.h>
72
+ #include <cupti_callbacks.h>
73
+ #include <cupti_events.h>
74
+ #include <cupti_metrics.h>
75
+
76
+ /* Runtime, driver, and nvtx function identifiers */
77
+ #include <cupti_driver_cbid.h>
78
+ #include <cupti_runtime_cbid.h>
79
+ #include <cupti_nvtx_cbid.h>
80
+
81
+ /* To support function parameter structures for obsoleted API. See
82
+ cuda.h for the actual definition of these structures. */
83
+ typedef unsigned int CUdeviceptr_v1;
84
+ typedef struct CUDA_MEMCPY2D_v1_st { int dummy; } CUDA_MEMCPY2D_v1;
85
+ typedef struct CUDA_MEMCPY3D_v1_st { int dummy; } CUDA_MEMCPY3D_v1;
86
+ typedef struct CUDA_ARRAY_DESCRIPTOR_v1_st { int dummy; } CUDA_ARRAY_DESCRIPTOR_v1;
87
+ typedef struct CUDA_ARRAY3D_DESCRIPTOR_v1_st { int dummy; } CUDA_ARRAY3D_DESCRIPTOR_v1;
88
+
89
+ /* Function parameter structures */
90
+ #include <generated_cuda_runtime_api_meta.h>
91
+ #include <generated_cuda_meta.h>
92
+
93
+ /* The following parameter structures cannot be included unless a
94
+ header that defines GL_VERSION is included before including them.
95
+ If these are needed then make sure such a header is included
96
+ already. */
97
+ #ifdef GL_VERSION
98
+ #include <generated_cuda_gl_interop_meta.h>
99
+ #include <generated_cudaGL_meta.h>
100
+ #endif
101
+
102
+ //#include <generated_nvtx_meta.h>
103
+
104
+ /* The following parameter structures cannot be included by default as
105
+ they are not guaranteed to be available on all systems. Uncomment
106
+ the includes that are available, or use the include explicitly. */
107
+ #if defined(__linux__)
108
+ //#include <generated_cuda_vdpau_interop_meta.h>
109
+ //#include <generated_cudaVDPAU_meta.h>
110
+ #endif
111
+
112
+ #ifdef _WIN32
113
+ //#include <generated_cuda_d3d9_interop_meta.h>
114
+ //#include <generated_cuda_d3d10_interop_meta.h>
115
+ //#include <generated_cuda_d3d11_interop_meta.h>
116
+ //#include <generated_cudaD3D9_meta.h>
117
+ //#include <generated_cudaD3D10_meta.h>
118
+ //#include <generated_cudaD3D11_meta.h>
119
+ #endif
120
+
121
+ #endif /*_CUPTI_H_*/
122
+
123
+
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_activity.h ADDED
The diff for this file is too large to render. See raw diff
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_activity_deprecated.h ADDED
The diff for this file is too large to render. See raw diff
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_callbacks.h ADDED
@@ -0,0 +1,863 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2010-2023 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(__CUPTI_CALLBACKS_H__)
51
+ #define __CUPTI_CALLBACKS_H__
52
+
53
+ #include <cuda.h>
54
+ #include <builtin_types.h>
55
+ #include <string.h>
56
+ #include <cuda_stdint.h>
57
+ #include <cupti_result.h>
58
+
59
+ #ifndef CUPTIAPI
60
+ #ifdef _WIN32
61
+ #define CUPTIAPI __stdcall
62
+ #else
63
+ #define CUPTIAPI
64
+ #endif
65
+ #endif
66
+
67
+ #if defined(__cplusplus)
68
+ extern "C" {
69
+ #endif
70
+
71
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
72
+ #pragma GCC visibility push(default)
73
+ #endif
74
+
75
+ /**
76
+ * \defgroup CUPTI_CALLBACK_API CUPTI Callback API
77
+ * Functions, types, and enums that implement the CUPTI Callback API.
78
+ * @{
79
+ */
80
+
81
+ /**
82
+ * \brief Specifies the point in an API call that a callback is issued.
83
+ *
84
+ * Specifies the point in an API call that a callback is issued. This
85
+ * value is communicated to the callback function via \ref
86
+ * CUpti_CallbackData::callbackSite.
87
+ */
88
+ typedef enum {
89
+ /**
90
+ * The callback is at the entry of the API call.
91
+ */
92
+ CUPTI_API_ENTER = 0,
93
+ /**
94
+ * The callback is at the exit of the API call.
95
+ */
96
+ CUPTI_API_EXIT = 1,
97
+ CUPTI_API_CBSITE_FORCE_INT = 0x7fffffff
98
+ } CUpti_ApiCallbackSite;
99
+
100
+ /**
101
+ * \brief Callback domains.
102
+ *
103
+ * Callback domains. Each domain represents callback points for a
104
+ * group of related API functions or CUDA driver activity.
105
+ */
106
+ typedef enum {
107
+ /**
108
+ * Invalid domain.
109
+ */
110
+ CUPTI_CB_DOMAIN_INVALID = 0,
111
+ /**
112
+ * Domain containing callback points for all driver API functions.
113
+ */
114
+ CUPTI_CB_DOMAIN_DRIVER_API = 1,
115
+ /**
116
+ * Domain containing callback points for all runtime API
117
+ * functions.
118
+ */
119
+ CUPTI_CB_DOMAIN_RUNTIME_API = 2,
120
+ /**
121
+ * Domain containing callback points for CUDA resource tracking.
122
+ */
123
+ CUPTI_CB_DOMAIN_RESOURCE = 3,
124
+ /**
125
+ * Domain containing callback points for CUDA synchronization.
126
+ */
127
+ CUPTI_CB_DOMAIN_SYNCHRONIZE = 4,
128
+ /**
129
+ * Domain containing callback points for NVTX API functions.
130
+ */
131
+ CUPTI_CB_DOMAIN_NVTX = 5,
132
+ /**
133
+ * Domain containing callback points for various states.
134
+ */
135
+ CUPTI_CB_DOMAIN_STATE = 6,
136
+
137
+ CUPTI_CB_DOMAIN_SIZE,
138
+
139
+ CUPTI_CB_DOMAIN_FORCE_INT = 0x7fffffff
140
+ } CUpti_CallbackDomain;
141
+
142
+ /**
143
+ * \brief Callback IDs for resource domain.
144
+ *
145
+ * Callback IDs for resource domain, CUPTI_CB_DOMAIN_RESOURCE. This
146
+ * value is communicated to the callback function via the \p cbid
147
+ * parameter.
148
+ */
149
+ typedef enum {
150
+ /**
151
+ * Invalid resource callback ID.
152
+ */
153
+ CUPTI_CBID_RESOURCE_INVALID = 0,
154
+ /**
155
+ * A new context has been created.
156
+ */
157
+ CUPTI_CBID_RESOURCE_CONTEXT_CREATED = 1,
158
+ /**
159
+ * A context is about to be destroyed.
160
+ */
161
+ CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING = 2,
162
+ /**
163
+ * A new stream has been created.
164
+ */
165
+ CUPTI_CBID_RESOURCE_STREAM_CREATED = 3,
166
+ /**
167
+ * A stream is about to be destroyed.
168
+ */
169
+ CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING = 4,
170
+ /**
171
+ * The driver has finished initializing.
172
+ */
173
+ CUPTI_CBID_RESOURCE_CU_INIT_FINISHED = 5,
174
+ /**
175
+ * A module has been loaded.
176
+ */
177
+ CUPTI_CBID_RESOURCE_MODULE_LOADED = 6,
178
+ /**
179
+ * A module is about to be unloaded.
180
+ */
181
+ CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING = 7,
182
+ /**
183
+ * The current module which is being profiled.
184
+ */
185
+ CUPTI_CBID_RESOURCE_MODULE_PROFILED = 8,
186
+ /**
187
+ * CUDA graph has been created.
188
+ */
189
+ CUPTI_CBID_RESOURCE_GRAPH_CREATED = 9,
190
+ /**
191
+ * CUDA graph is about to be destroyed.
192
+ */
193
+ CUPTI_CBID_RESOURCE_GRAPH_DESTROY_STARTING = 10,
194
+ /**
195
+ * CUDA graph is cloned.
196
+ */
197
+ CUPTI_CBID_RESOURCE_GRAPH_CLONED = 11,
198
+ /**
199
+ * CUDA graph node is about to be created
200
+ */
201
+ CUPTI_CBID_RESOURCE_GRAPHNODE_CREATE_STARTING = 12,
202
+ /**
203
+ * CUDA graph node is created.
204
+ */
205
+ CUPTI_CBID_RESOURCE_GRAPHNODE_CREATED = 13,
206
+ /**
207
+ * CUDA graph node is about to be destroyed.
208
+ */
209
+ CUPTI_CBID_RESOURCE_GRAPHNODE_DESTROY_STARTING = 14,
210
+ /**
211
+ * Dependency on a CUDA graph node is created.
212
+ */
213
+ CUPTI_CBID_RESOURCE_GRAPHNODE_DEPENDENCY_CREATED = 15,
214
+ /**
215
+ * Dependency on a CUDA graph node is destroyed.
216
+ */
217
+ CUPTI_CBID_RESOURCE_GRAPHNODE_DEPENDENCY_DESTROY_STARTING = 16,
218
+ /**
219
+ * An executable CUDA graph is about to be created.
220
+ */
221
+ CUPTI_CBID_RESOURCE_GRAPHEXEC_CREATE_STARTING = 17,
222
+ /**
223
+ * An executable CUDA graph is created.
224
+ */
225
+ CUPTI_CBID_RESOURCE_GRAPHEXEC_CREATED = 18,
226
+ /**
227
+ * An executable CUDA graph is about to be destroyed.
228
+ */
229
+ CUPTI_CBID_RESOURCE_GRAPHEXEC_DESTROY_STARTING = 19,
230
+ /**
231
+ * CUDA graph node is cloned.
232
+ */
233
+ CUPTI_CBID_RESOURCE_GRAPHNODE_CLONED = 20,
234
+ /**
235
+ * CUDA stream attribute is changed.
236
+ */
237
+ CUPTI_CBID_RESOURCE_STREAM_ATTRIBUTE_CHANGED = 21,
238
+
239
+ CUPTI_CBID_RESOURCE_SIZE,
240
+ CUPTI_CBID_RESOURCE_FORCE_INT = 0x7fffffff
241
+ } CUpti_CallbackIdResource;
242
+
243
+ /**
244
+ * \brief Callback IDs for synchronization domain.
245
+ *
246
+ * Callback IDs for synchronization domain,
247
+ * CUPTI_CB_DOMAIN_SYNCHRONIZE. This value is communicated to the
248
+ * callback function via the \p cbid parameter.
249
+ */
250
+ typedef enum {
251
+ /**
252
+ * Invalid synchronize callback ID.
253
+ */
254
+ CUPTI_CBID_SYNCHRONIZE_INVALID = 0,
255
+ /**
256
+ * Stream synchronization has completed for the stream.
257
+ */
258
+ CUPTI_CBID_SYNCHRONIZE_STREAM_SYNCHRONIZED = 1,
259
+ /**
260
+ * Context synchronization has completed for the context.
261
+ */
262
+ CUPTI_CBID_SYNCHRONIZE_CONTEXT_SYNCHRONIZED = 2,
263
+ CUPTI_CBID_SYNCHRONIZE_SIZE,
264
+ CUPTI_CBID_SYNCHRONIZE_FORCE_INT = 0x7fffffff
265
+ } CUpti_CallbackIdSync;
266
+
267
+
268
+ /**
269
+ * \brief Callback IDs for state domain.
270
+ *
271
+ * Callback IDs for state domain,
272
+ * CUPTI_CB_DOMAIN_STATE. This value is communicated to the
273
+ * callback function via the \p cbid parameter.
274
+ */
275
+ typedef enum {
276
+ /**
277
+ * Invalid state callback ID.
278
+ */
279
+ CUPTI_CBID_STATE_INVALID = 0,
280
+ /**
281
+ * Notification of fatal errors - high impact, non-recoverable
282
+ * When encountered, CUPTI automatically invokes cuptiFinalize()
283
+ * User can control behavior of the application in future from
284
+ * receiving this callback - such as continuing without profiling, or
285
+ * terminating the whole application.
286
+ */
287
+ CUPTI_CBID_STATE_FATAL_ERROR = 1,
288
+ /**
289
+ * Notification of non fatal errors - high impact, but recoverable
290
+ * This notification is not issued in the current release.
291
+ */
292
+ CUPTI_CBID_STATE_ERROR = 2,
293
+ /**
294
+ * Notification of warnings - low impact, recoverable.
295
+ */
296
+ CUPTI_CBID_STATE_WARNING = 3,
297
+
298
+ CUPTI_CBID_STATE_SIZE,
299
+ CUPTI_CBID_STATE_FORCE_INT = 0x7fffffff
300
+ } CUpti_CallbackIdState;
301
+
302
+
303
+ /**
304
+ * \brief Data passed into a runtime or driver API callback function.
305
+ *
306
+ * Data passed into a runtime or driver API callback function as the
307
+ * \p cbdata argument to \ref CUpti_CallbackFunc. The \p cbdata will
308
+ * be this type for \p domain equal to CUPTI_CB_DOMAIN_DRIVER_API or
309
+ * CUPTI_CB_DOMAIN_RUNTIME_API. The callback data is valid only within
310
+ * the invocation of the callback function that is passed the data. If
311
+ * you need to retain some data for use outside of the callback, you
312
+ * must make a copy of that data. For example, if you make a shallow
313
+ * copy of CUpti_CallbackData within a callback, you cannot
314
+ * dereference \p functionParams outside of that callback to access
315
+ * the function parameters. \p functionName is an exception: the
316
+ * string pointed to by \p functionName is a global constant and so
317
+ * may be accessed outside of the callback.
318
+ */
319
+ typedef struct {
320
+ /**
321
+ * Point in the runtime or driver function from where the callback
322
+ * was issued.
323
+ */
324
+ CUpti_ApiCallbackSite callbackSite;
325
+
326
+ /**
327
+ * Name of the runtime or driver API function which issued the
328
+ * callback. This string is a global constant and so may be
329
+ * accessed outside of the callback.
330
+ */
331
+ const char *functionName;
332
+
333
+ /**
334
+ * Pointer to the arguments passed to the runtime or driver API
335
+ * call. See generated_cuda_runtime_api_meta.h and
336
+ * generated_cuda_meta.h for structure definitions for the
337
+ * parameters for each runtime and driver API function.
338
+ */
339
+ const void *functionParams;
340
+
341
+ /**
342
+ * Pointer to the return value of the runtime or driver API
343
+ * call. This field is only valid within the exit::CUPTI_API_EXIT
344
+ * callback. For a runtime API \p functionReturnValue points to a
345
+ * \p cudaError_t. For a driver API \p functionReturnValue points
346
+ * to a \p CUresult.
347
+ */
348
+ void *functionReturnValue;
349
+
350
+ /**
351
+ * Name of the symbol operated on by the runtime or driver API
352
+ * function which issued the callback. This entry is valid only for
353
+ * driver and runtime launch callbacks, where it returns the name of
354
+ * the kernel.
355
+ */
356
+ const char *symbolName;
357
+
358
+ /**
359
+ * Driver context current to the thread, or null if no context is
360
+ * current. This value can change from the entry to exit callback
361
+ * of a runtime API function if the runtime initializes a context.
362
+ */
363
+ CUcontext context;
364
+
365
+ /**
366
+ * Unique ID for the CUDA context associated with the thread. The
367
+ * UIDs are assigned sequentially as contexts are created and are
368
+ * unique within a process.
369
+ */
370
+ uint32_t contextUid;
371
+
372
+ /**
373
+ * Pointer to data shared between the entry and exit callbacks of
374
+ * a given runtime or drive API function invocation. This field
375
+ * can be used to pass 64-bit values from the entry callback to
376
+ * the corresponding exit callback.
377
+ */
378
+ uint64_t *correlationData;
379
+
380
+ /**
381
+ * The activity record correlation ID for this callback. For a
382
+ * driver domain callback (i.e. \p domain
383
+ * CUPTI_CB_DOMAIN_DRIVER_API) this ID will equal the correlation ID
384
+ * in the CUpti_ActivityAPI record corresponding to the CUDA driver
385
+ * function call. For a runtime domain callback (i.e. \p domain
386
+ * CUPTI_CB_DOMAIN_RUNTIME_API) this ID will equal the correlation
387
+ * ID in the CUpti_ActivityAPI record corresponding to the CUDA
388
+ * runtime function call. Within the callback, this ID can be
389
+ * recorded to correlate user data with the activity record. This
390
+ * field is new in 4.1.
391
+ */
392
+ uint32_t correlationId;
393
+
394
+ } CUpti_CallbackData;
395
+
396
+ /**
397
+ * \brief Data passed into a resource callback function.
398
+ *
399
+ * Data passed into a resource callback function as the \p cbdata
400
+ * argument to \ref CUpti_CallbackFunc. The \p cbdata will be this
401
+ * type for \p domain equal to CUPTI_CB_DOMAIN_RESOURCE. The callback
402
+ * data is valid only within the invocation of the callback function
403
+ * that is passed the data. If you need to retain some data for use
404
+ * outside of the callback, you must make a copy of that data.
405
+ */
406
+ typedef struct {
407
+ /**
408
+ * For CUPTI_CBID_RESOURCE_CONTEXT_CREATED and
409
+ * CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING, the context being
410
+ * created or destroyed. For CUPTI_CBID_RESOURCE_STREAM_CREATED and
411
+ * CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING, the context
412
+ * containing the stream being created or destroyed.
413
+ */
414
+ CUcontext context;
415
+
416
+ union {
417
+ /**
418
+ * For CUPTI_CBID_RESOURCE_STREAM_CREATED and
419
+ * CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING, the stream being
420
+ * created or destroyed.
421
+ */
422
+ CUstream stream;
423
+ } resourceHandle;
424
+
425
+ /**
426
+ * Reserved for future use.
427
+ */
428
+ void *resourceDescriptor;
429
+ } CUpti_ResourceData;
430
+
431
+
432
+ /**
433
+ * \brief Module data passed into a resource callback function.
434
+ *
435
+ * CUDA module data passed into a resource callback function as the \p cbdata
436
+ * argument to \ref CUpti_CallbackFunc. The \p cbdata will be this
437
+ * type for \p domain equal to CUPTI_CB_DOMAIN_RESOURCE. The module
438
+ * data is valid only within the invocation of the callback function
439
+ * that is passed the data. If you need to retain some data for use
440
+ * outside of the callback, you must make a copy of that data.
441
+ */
442
+
443
+ typedef struct {
444
+ /**
445
+ * Identifier to associate with the CUDA module.
446
+ */
447
+ uint32_t moduleId;
448
+
449
+ /**
450
+ * The size of the cubin.
451
+ */
452
+ size_t cubinSize;
453
+
454
+ /**
455
+ * Pointer to the associated cubin.
456
+ */
457
+ const char *pCubin;
458
+ } CUpti_ModuleResourceData;
459
+
460
+ /**
461
+ * \brief CUDA graphs data passed into a resource callback function.
462
+ *
463
+ * CUDA graphs data passed into a resource callback function as the \p cbdata
464
+ * argument to \ref CUpti_CallbackFunc. The \p cbdata will be this
465
+ * type for \p domain equal to CUPTI_CB_DOMAIN_RESOURCE. The graph
466
+ * data is valid only within the invocation of the callback function
467
+ * that is passed the data. If you need to retain some data for use
468
+ * outside of the callback, you must make a copy of that data.
469
+ */
470
+
471
+ typedef struct {
472
+ /**
473
+ * CUDA graph
474
+ */
475
+ CUgraph graph;
476
+ /**
477
+ * The original CUDA graph from which \param graph is cloned
478
+ */
479
+ CUgraph originalGraph;
480
+ /**
481
+ * CUDA graph node
482
+ */
483
+ CUgraphNode node;
484
+ /**
485
+ * The original CUDA graph node from which \param node is cloned
486
+ */
487
+ CUgraphNode originalNode;
488
+ /**
489
+ * Type of the \param node
490
+ */
491
+ CUgraphNodeType nodeType;
492
+ /**
493
+ * The dependent graph node
494
+ * The size of the array is \param numDependencies.
495
+ */
496
+ CUgraphNode dependency;
497
+ /**
498
+ * CUDA executable graph
499
+ */
500
+ CUgraphExec graphExec;
501
+ } CUpti_GraphData;
502
+
503
+ /**
504
+ * \brief Data passed into a synchronize callback function.
505
+ *
506
+ * Data passed into a synchronize callback function as the \p cbdata
507
+ * argument to \ref CUpti_CallbackFunc. The \p cbdata will be this
508
+ * type for \p domain equal to CUPTI_CB_DOMAIN_SYNCHRONIZE. The
509
+ * callback data is valid only within the invocation of the callback
510
+ * function that is passed the data. If you need to retain some data
511
+ * for use outside of the callback, you must make a copy of that data.
512
+ */
513
+ typedef struct {
514
+ /**
515
+ * The context of the stream being synchronized.
516
+ */
517
+ CUcontext context;
518
+ /**
519
+ * The stream being synchronized.
520
+ */
521
+ CUstream stream;
522
+ } CUpti_SynchronizeData;
523
+
524
+ /**
525
+ * \brief Data passed into a NVTX callback function.
526
+ *
527
+ * Data passed into a NVTX callback function as the \p cbdata argument
528
+ * to \ref CUpti_CallbackFunc. The \p cbdata will be this type for \p
529
+ * domain equal to CUPTI_CB_DOMAIN_NVTX. Unless otherwise notes, the
530
+ * callback data is valid only within the invocation of the callback
531
+ * function that is passed the data. If you need to retain some data
532
+ * for use outside of the callback, you must make a copy of that data.
533
+ */
534
+ typedef struct {
535
+ /**
536
+ * Name of the NVTX API function which issued the callback. This
537
+ * string is a global constant and so may be accessed outside of the
538
+ * callback.
539
+ */
540
+ const char *functionName;
541
+
542
+ /**
543
+ * Pointer to the arguments passed to the NVTX API call. See
544
+ * generated_nvtx_meta.h for structure definitions for the
545
+ * parameters for each NVTX API function.
546
+ */
547
+ const void *functionParams;
548
+
549
+ /**
550
+ * Pointer to the return value of the NVTX API call. See
551
+ * nvToolsExt.h for each NVTX API function's return value.
552
+ */
553
+ const void *functionReturnValue;
554
+ } CUpti_NvtxData;
555
+
556
+ /**
557
+ * \brief Stream attribute data passed into a resource callback function
558
+ * for CUPTI_CBID_RESOURCE_STREAM_ATTRIBUTE_CHANGED callback
559
+
560
+ * Data passed into a resource callback function as the \p cbdata
561
+ * argument to \ref CUpti_CallbackFunc. The \p cbdata will be this
562
+ * type for \p domain equal to CUPTI_CB_DOMAIN_RESOURCE. The
563
+ * stream attribute data is valid only within the invocation of the callback
564
+ * function that is passed the data. If you need to retain some data
565
+ * for use outside of the callback, you must make a copy of that data.
566
+ */
567
+ typedef struct {
568
+ /**
569
+ * The CUDA stream handle for the attribute
570
+ */
571
+ CUstream stream;
572
+
573
+ /**
574
+ * The type of the CUDA stream attribute
575
+ */
576
+ CUstreamAttrID attr;
577
+
578
+ /**
579
+ * The value of the CUDA stream attribute
580
+ */
581
+ const CUstreamAttrValue *value;
582
+ } CUpti_StreamAttrData;
583
+
584
+ /**
585
+ * \brief Data passed into a State callback function.
586
+ *
587
+ * Data passed into a State callback function as the \p cbdata argument
588
+ * to \ref CUpti_CallbackFunc. The \p cbdata will be this type for \p
589
+ * domain equal to CUPTI_CB_DOMAIN_STATE and callback Ids belonging to CUpti_CallbackIdState.
590
+ * Unless otherwise noted, the callback data is valid only within the invocation of the callback
591
+ * function that is passed the data. If you need to retain some data
592
+ * for use outside of the callback, you must make a copy of that data.
593
+ */
594
+ typedef struct {
595
+ union {
596
+ /**
597
+ * Data passed along with the callback Ids
598
+ * Enum CUpti_CallbackIdState used to denote callback ids
599
+ */
600
+ struct {
601
+ /**
602
+ * Error code
603
+ */
604
+ CUptiResult result;
605
+ /**
606
+ * String containing more details. It can be NULL.
607
+ */
608
+ const char *message;
609
+ } notification;
610
+ };
611
+ } CUpti_StateData;
612
+
613
+ /**
614
+ * \brief An ID for a driver API, runtime API, resource or
615
+ * synchronization callback.
616
+ *
617
+ * An ID for a driver API, runtime API, resource or synchronization
618
+ * callback. Within a driver API callback this should be interpreted
619
+ * as a CUpti_driver_api_trace_cbid value (these values are defined in
620
+ * cupti_driver_cbid.h). Within a runtime API callback this should be
621
+ * interpreted as a CUpti_runtime_api_trace_cbid value (these values
622
+ * are defined in cupti_runtime_cbid.h). Within a resource API
623
+ * callback this should be interpreted as a \ref
624
+ * CUpti_CallbackIdResource value. Within a synchronize API callback
625
+ * this should be interpreted as a \ref CUpti_CallbackIdSync value.
626
+ */
627
+ typedef uint32_t CUpti_CallbackId;
628
+
629
+ /**
630
+ * \brief Function type for a callback.
631
+ *
632
+ * Function type for a callback. The type of the data passed to the
633
+ * callback in \p cbdata depends on the \p domain. If \p domain is
634
+ * CUPTI_CB_DOMAIN_DRIVER_API or CUPTI_CB_DOMAIN_RUNTIME_API the type
635
+ * of \p cbdata will be CUpti_CallbackData. If \p domain is
636
+ * CUPTI_CB_DOMAIN_RESOURCE the type of \p cbdata will be
637
+ * CUpti_ResourceData. If \p domain is CUPTI_CB_DOMAIN_SYNCHRONIZE the
638
+ * type of \p cbdata will be CUpti_SynchronizeData. If \p domain is
639
+ * CUPTI_CB_DOMAIN_NVTX the type of \p cbdata will be CUpti_NvtxData.
640
+ *
641
+ * \param userdata User data supplied at subscription of the callback
642
+ * \param domain The domain of the callback
643
+ * \param cbid The ID of the callback
644
+ * \param cbdata Data passed to the callback.
645
+ */
646
+ typedef void (CUPTIAPI *CUpti_CallbackFunc)(
647
+ void *userdata,
648
+ CUpti_CallbackDomain domain,
649
+ CUpti_CallbackId cbid,
650
+ const void *cbdata);
651
+
652
+ /**
653
+ * \brief A callback subscriber.
654
+ */
655
+ typedef struct CUpti_Subscriber_st *CUpti_SubscriberHandle;
656
+
657
+ /**
658
+ * \brief Pointer to an array of callback domains.
659
+ */
660
+ typedef CUpti_CallbackDomain *CUpti_DomainTable;
661
+
662
+ /**
663
+ * \brief Get the available callback domains.
664
+ *
665
+ * Returns in \p *domainTable an array of size \p *domainCount of all
666
+ * the available callback domains.
667
+ * \note \b Thread-safety: this function is thread safe.
668
+ *
669
+ * \param domainCount Returns number of callback domains
670
+ * \param domainTable Returns pointer to array of available callback domains
671
+ *
672
+ * \retval CUPTI_SUCCESS on success
673
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialize CUPTI
674
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p domainCount or \p domainTable are NULL
675
+ */
676
+ CUptiResult CUPTIAPI cuptiSupportedDomains(size_t *domainCount,
677
+ CUpti_DomainTable *domainTable);
678
+
679
+ /**
680
+ * \brief Initialize a callback subscriber with a callback function
681
+ * and user data.
682
+ *
683
+ * Initializes a callback subscriber with a callback function and
684
+ * (optionally) a pointer to user data. The returned subscriber handle
685
+ * can be used to enable and disable the callback for specific domains
686
+ * and callback IDs.
687
+ * \note Only a single subscriber can be registered at a time. To ensure
688
+ * that no other CUPTI client interrupts the profiling session, it's the
689
+ * responsibility of all the CUPTI clients to call this function before
690
+ * starting the profling session. In case profiling session is already
691
+ * started by another CUPTI client, this function returns the error code
692
+ * CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED.
693
+ * Note that this function returns the same error when application is
694
+ * launched using NVIDIA tools like nvprof, Visual Profiler, Nsight Systems,
695
+ * Nsight Compute, cuda-gdb and cuda-memcheck.
696
+ * \note This function does not enable any callbacks.
697
+ * \note \b Thread-safety: this function is thread safe.
698
+ *
699
+ * \param subscriber Returns handle to initialize subscriber
700
+ * \param callback The callback function
701
+ * \param userdata A pointer to user data. This data will be passed to
702
+ * the callback function via the \p userdata parameter.
703
+ *
704
+ * \retval CUPTI_SUCCESS on success
705
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialize CUPTI
706
+ * \retval CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED if there is already a CUPTI subscriber
707
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p subscriber is NULL
708
+ */
709
+ CUptiResult CUPTIAPI cuptiSubscribe(CUpti_SubscriberHandle *subscriber,
710
+ CUpti_CallbackFunc callback,
711
+ void *userdata);
712
+
713
+ /**
714
+ * \brief Unregister a callback subscriber.
715
+ *
716
+ * Removes a callback subscriber so that no future callbacks will be
717
+ * issued to that subscriber.
718
+ * \note \b Thread-safety: this function is thread safe.
719
+ *
720
+ * \param subscriber Handle to the initialize subscriber
721
+ *
722
+ * \retval CUPTI_SUCCESS on success
723
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
724
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p subscriber is NULL or not initialized
725
+ */
726
+ CUptiResult CUPTIAPI cuptiUnsubscribe(CUpti_SubscriberHandle subscriber);
727
+
728
+ /**
729
+ * \brief Get the current enabled/disabled state of a callback for a specific
730
+ * domain and function ID.
731
+ *
732
+ * Returns non-zero in \p *enable if the callback for a domain and
733
+ * callback ID is enabled, and zero if not enabled.
734
+ *
735
+ * \note \b Thread-safety: a subscriber must serialize access to
736
+ * cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and
737
+ * cuptiEnableAllDomains. For example, if cuptiGetCallbackState(sub,
738
+ * d, c) and cuptiEnableCallback(sub, d, c) are called concurrently,
739
+ * the results are undefined.
740
+ *
741
+ * \param enable Returns non-zero if callback enabled, zero if not enabled
742
+ * \param subscriber Handle to the initialize subscriber
743
+ * \param domain The domain of the callback
744
+ * \param cbid The ID of the callback
745
+ *
746
+ * \retval CUPTI_SUCCESS on success
747
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
748
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p enabled is NULL, or if \p
749
+ * subscriber, \p domain or \p cbid is invalid.
750
+ */
751
+ CUptiResult CUPTIAPI cuptiGetCallbackState(uint32_t *enable,
752
+ CUpti_SubscriberHandle subscriber,
753
+ CUpti_CallbackDomain domain,
754
+ CUpti_CallbackId cbid);
755
+
756
+ /**
757
+ * \brief Enable or disabled callbacks for a specific domain and
758
+ * callback ID.
759
+ *
760
+ * Enable or disabled callbacks for a subscriber for a specific domain
761
+ * and callback ID.
762
+ *
763
+ * \note \b Thread-safety: a subscriber must serialize access to
764
+ * cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and
765
+ * cuptiEnableAllDomains. For example, if cuptiGetCallbackState(sub,
766
+ * d, c) and cuptiEnableCallback(sub, d, c) are called concurrently,
767
+ * the results are undefined.
768
+ *
769
+ * \param enable New enable state for the callback. Zero disables the
770
+ * callback, non-zero enables the callback.
771
+ * \param subscriber - Handle to callback subscription
772
+ * \param domain The domain of the callback
773
+ * \param cbid The ID of the callback
774
+ *
775
+ * \retval CUPTI_SUCCESS on success
776
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
777
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p subscriber, \p domain or \p
778
+ * cbid is invalid.
779
+ */
780
+ CUptiResult CUPTIAPI cuptiEnableCallback(uint32_t enable,
781
+ CUpti_SubscriberHandle subscriber,
782
+ CUpti_CallbackDomain domain,
783
+ CUpti_CallbackId cbid);
784
+
785
+ /**
786
+ * \brief Enable or disabled all callbacks for a specific domain.
787
+ *
788
+ * Enable or disabled all callbacks for a specific domain.
789
+ *
790
+ * \note \b Thread-safety: a subscriber must serialize access to
791
+ * cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and
792
+ * cuptiEnableAllDomains. For example, if cuptiGetCallbackEnabled(sub,
793
+ * d, *) and cuptiEnableDomain(sub, d) are called concurrently, the
794
+ * results are undefined.
795
+ *
796
+ * \param enable New enable state for all callbacks in the
797
+ * domain. Zero disables all callbacks, non-zero enables all
798
+ * callbacks.
799
+ * \param subscriber - Handle to callback subscription
800
+ * \param domain The domain of the callback
801
+ *
802
+ * \retval CUPTI_SUCCESS on success
803
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
804
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p subscriber or \p domain is invalid
805
+ */
806
+ CUptiResult CUPTIAPI cuptiEnableDomain(uint32_t enable,
807
+ CUpti_SubscriberHandle subscriber,
808
+ CUpti_CallbackDomain domain);
809
+
810
+ /**
811
+ * \brief Enable or disable all callbacks in all domains.
812
+ *
813
+ * Enable or disable all callbacks in all domains.
814
+ *
815
+ * \note \b Thread-safety: a subscriber must serialize access to
816
+ * cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and
817
+ * cuptiEnableAllDomains. For example, if cuptiGetCallbackState(sub,
818
+ * d, *) and cuptiEnableAllDomains(sub) are called concurrently, the
819
+ * results are undefined.
820
+ *
821
+ * \param enable New enable state for all callbacks in all
822
+ * domain. Zero disables all callbacks, non-zero enables all
823
+ * callbacks.
824
+ * \param subscriber - Handle to callback subscription
825
+ *
826
+ * \retval CUPTI_SUCCESS on success
827
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
828
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p subscriber is invalid
829
+ */
830
+ CUptiResult CUPTIAPI cuptiEnableAllDomains(uint32_t enable,
831
+ CUpti_SubscriberHandle subscriber);
832
+
833
+ /**
834
+ * \brief Get the name of a callback for a specific domain and callback ID.
835
+ *
836
+ * Returns a pointer to the name c_string in \p **name.
837
+ *
838
+ * \note \b Names are available only for the DRIVER and RUNTIME domains.
839
+ *
840
+ * \param domain The domain of the callback
841
+ * \param cbid The ID of the callback
842
+ * \param name Returns pointer to the name string on success, NULL otherwise
843
+ *
844
+ * \retval CUPTI_SUCCESS on success
845
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p name is NULL, or if
846
+ * \p domain or \p cbid is invalid.
847
+ */
848
+ CUptiResult CUPTIAPI cuptiGetCallbackName(CUpti_CallbackDomain domain,
849
+ uint32_t cbid,
850
+ const char **name);
851
+
852
+ /** @} */ /* END CUPTI_CALLBACK_API */
853
+
854
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
855
+ #pragma GCC visibility pop
856
+ #endif
857
+
858
+ #if defined(__cplusplus)
859
+ }
860
+ #endif
861
+
862
+ #endif // file guard
863
+
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_checkpoint.h ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <cuda.h>
4
+ #include <cupti_result.h>
5
+
6
+ #include <stddef.h>
7
+ #include <stdint.h>
8
+
9
+ namespace NV { namespace Cupti { namespace Checkpoint {
10
+
11
+ #ifdef __cplusplus
12
+ extern "C"
13
+ {
14
+ #endif
15
+
16
+ /**
17
+ * \defgroup CUPTI_CHECKPOINT_API CUPTI Checkpoint API
18
+ * Functions, types, and enums that implement the CUPTI Checkpoint API.
19
+ * @{
20
+ */
21
+
22
+ /**
23
+ * \brief Specifies optimization options for a checkpoint, may be OR'd together to specify multiple options.
24
+ */
25
+ typedef enum
26
+ {
27
+ CUPTI_CHECKPOINT_OPT_NONE = 0, //!< Default behavior
28
+ CUPTI_CHECKPOINT_OPT_TRANSFER = 1, //!< Determine which mem blocks have changed, and only restore those. This optimization is cached, which means cuptiCheckpointRestore must always be called at the same point in the application when this option is enabled, or the result may be incorrect.
29
+ } CUpti_CheckpointOptimizations;
30
+
31
+ /**
32
+ * \brief Configuration and handle for a CUPTI Checkpoint
33
+ *
34
+ * A CUptiCheckpoint object should be initialized with desired options prior to passing into any
35
+ * CUPTI Checkpoint API function. The first call into a Checkpoint API function will initialize internal
36
+ * state based on these options. Subsequent changes to these options will not have any effect.
37
+ *
38
+ * Checkpoint data is saved in device, host, and filesystem space. There are options to reserve memory
39
+ * at each level (device, host, filesystem) which are intended to allow a guarantee that a certain amount
40
+ * of memory will remain free for use after the checkpoint is saved.
41
+ * Note, however, that falling back to slower levels of memory (host, and then filesystem) to save the checkpoint
42
+ * will result in performance degradation.
43
+ * Currently, the filesystem limitation is not implemented. Note that falling back to filesystem storage may
44
+ * significantly impact the performance for saving and restoring a checkpoint.
45
+ */
46
+ typedef struct
47
+ {
48
+ size_t structSize; //!< [in] Must be set to CUpti_Checkpoint_STRUCT_SIZE
49
+
50
+ CUcontext ctx; //!< [in] Set to context to save from, or will use current context if NULL
51
+
52
+ size_t reserveDeviceMB; //!< [in] Restrict checkpoint from using last N MB of device memory (-1 = use no device memory)
53
+ size_t reserveHostMB; //!< [in] Restrict checkpoint from using last N MB of host memory (-1 = use no host memory)
54
+ uint8_t allowOverwrite; //!< [in] Boolean, Allow checkpoint to save over existing checkpoint
55
+ uint8_t optimizations; //!< [in] Mask of CUpti_CheckpointOptimizations flags for this checkpoint
56
+
57
+ void * pPriv; //!< [in] Assign to NULL
58
+ } CUpti_Checkpoint;
59
+
60
+ #define CUpti_Checkpoint_STRUCT_SIZE \
61
+ (offsetof(CUpti_Checkpoint, pPriv) + \
62
+ sizeof(((CUpti_Checkpoint*)(nullptr))->pPriv))
63
+
64
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
65
+ #pragma GCC visibility push(default)
66
+ #endif
67
+
68
+ /**
69
+ * \brief Initialize and save a checkpoint of the device state associated with the handle context
70
+ *
71
+ * Uses the handle options to configure and save a checkpoint of the device state associated with the specified context.
72
+ *
73
+ * \param handle A pointer to a CUpti_Checkpoint object
74
+ *
75
+ * \retval CUPTI_SUCCESS if a checkpoint was successfully initialized and saved
76
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p handle does not appear to refer to a valid CUpti_Checkpoint
77
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
78
+ * \retval CUPTI_ERROR_INVALID_DEVICE if device associated with context is not compatible with checkpoint API
79
+ * \retval CUPTI_ERROR_INVALID_OPERATION if Save is requested over an existing checkpoint, but \p allowOverwrite was not originally specified
80
+ * \retval CUPTI_ERROR_OUT_OF_MEMORY if as configured, not enough backing storage space to save the checkpoint
81
+ */
82
+ CUptiResult cuptiCheckpointSave(CUpti_Checkpoint * const handle);
83
+
84
+ /**
85
+ * \brief Restore a checkpoint to the device associated with its context
86
+ *
87
+ * Restores device, pinned, and allocated memory to the state when the checkpoint was saved
88
+ *
89
+ * \param handle A pointer to a previously saved CUpti_Checkpoint object
90
+ *
91
+ * \retval CUTPI_SUCCESS if the checkpoint was successfully restored
92
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if the checkpoint was not previously initialized
93
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
94
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if the handle appears invalid
95
+ * \retval CUPTI_ERROR_UNKNOWN if the restore or optimization operation fails
96
+ */
97
+ CUptiResult cuptiCheckpointRestore(CUpti_Checkpoint * const handle);
98
+
99
+ /**
100
+ * \brief Free the backing data for a checkpoint
101
+ *
102
+ * Frees all associated device, host memory and filesystem storage used for this context.
103
+ * After freeing a handle, it may be re-used as if it was new - options may be re-configured and will
104
+ * take effect on the next call to \p cuptiCheckpointSave.
105
+ *
106
+ * \param handle A pointer to a previously saved CUpti_Checkpoint object
107
+ *
108
+ * \retval CUPTI_SUCCESS if the handle was successfully freed
109
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if the handle was already freed or appears invalid
110
+ * \retval CUPTI_ERROR_INVALID_CONTEXT if the context is no longer valid
111
+ */
112
+ CUptiResult cuptiCheckpointFree(CUpti_Checkpoint * const handle);
113
+
114
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
115
+ #pragma GCC visibility pop
116
+ #endif
117
+
118
+ /**
119
+ * @}
120
+ */
121
+
122
+ #ifdef __cplusplus
123
+ }
124
+ #endif
125
+
126
+ // Exit namespace NV::Cupti::Checkpoint
127
+ }}}
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_common.h ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2023 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(__CUPTI_COMMON_H__)
51
+ #define __CUPTI_COMMON_H__
52
+
53
+ #ifndef CUPTIAPI
54
+ #ifdef _WIN32
55
+ #define CUPTIAPI __stdcall
56
+ #else
57
+ #define CUPTIAPI
58
+ #endif
59
+ #endif
60
+
61
+ #ifndef CUPTIUTILAPI
62
+ #ifdef _WIN32
63
+ #define CUPTIUTILAPI __stdcall
64
+ #else
65
+ #define CUPTIUTILAPI
66
+ #endif
67
+ #endif
68
+
69
+ #if defined(__LP64__)
70
+ #define CUPTILP64 1
71
+ #elif defined(_WIN64)
72
+ #define CUPTILP64 1
73
+ #else
74
+ #undef CUPTILP64
75
+ #endif
76
+
77
+ #define ACTIVITY_RECORD_ALIGNMENT 8
78
+ #if defined(_WIN32) // Windows 32- and 64-bit
79
+ #define START_PACKED_ALIGNMENT __pragma(pack(push,1)) // exact fit - no padding
80
+ #define PACKED_ALIGNMENT __declspec(align(ACTIVITY_RECORD_ALIGNMENT))
81
+ #define END_PACKED_ALIGNMENT __pragma(pack(pop))
82
+ #elif defined(__GNUC__) // GCC
83
+ #define START_PACKED_ALIGNMENT
84
+ #define PACKED_ALIGNMENT __attribute__ ((__packed__)) __attribute__ ((aligned (ACTIVITY_RECORD_ALIGNMENT)))
85
+ #define END_PACKED_ALIGNMENT
86
+ #else // all other compilers
87
+ #define START_PACKED_ALIGNMENT
88
+ #define PACKED_ALIGNMENT
89
+ #define END_PACKED_ALIGNMENT
90
+ #endif
91
+
92
+ #endif /*__CUPTI_COMMON_H__*/
93
+
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_driver_cbid.h ADDED
@@ -0,0 +1,799 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ // *************************************************************************
3
+ // Definitions of indices for API functions, unique across entire API
4
+ // *************************************************************************
5
+
6
+ // This file is generated. Any changes you make will be lost during the next clean build.
7
+ // CUDA public interface, for type definitions and cu* function prototypes
8
+
9
+ #if !defined(_CUPTI_DRIVER_CBID_H_)
10
+ #define _CUPTI_DRIVER_CBID_H_
11
+
12
+ typedef enum CUpti_driver_api_trace_cbid_enum {
13
+ CUPTI_DRIVER_TRACE_CBID_INVALID = 0,
14
+ CUPTI_DRIVER_TRACE_CBID_cuInit = 1,
15
+ CUPTI_DRIVER_TRACE_CBID_cuDriverGetVersion = 2,
16
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGet = 3,
17
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetCount = 4,
18
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetName = 5,
19
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceComputeCapability = 6,
20
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceTotalMem = 7,
21
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetProperties = 8,
22
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetAttribute = 9,
23
+ CUPTI_DRIVER_TRACE_CBID_cuCtxCreate = 10,
24
+ CUPTI_DRIVER_TRACE_CBID_cuCtxDestroy = 11,
25
+ CUPTI_DRIVER_TRACE_CBID_cuCtxAttach = 12,
26
+ CUPTI_DRIVER_TRACE_CBID_cuCtxDetach = 13,
27
+ CUPTI_DRIVER_TRACE_CBID_cuCtxPushCurrent = 14,
28
+ CUPTI_DRIVER_TRACE_CBID_cuCtxPopCurrent = 15,
29
+ CUPTI_DRIVER_TRACE_CBID_cuCtxGetDevice = 16,
30
+ CUPTI_DRIVER_TRACE_CBID_cuCtxSynchronize = 17,
31
+ CUPTI_DRIVER_TRACE_CBID_cuModuleLoad = 18,
32
+ CUPTI_DRIVER_TRACE_CBID_cuModuleLoadData = 19,
33
+ CUPTI_DRIVER_TRACE_CBID_cuModuleLoadDataEx = 20,
34
+ CUPTI_DRIVER_TRACE_CBID_cuModuleLoadFatBinary = 21,
35
+ CUPTI_DRIVER_TRACE_CBID_cuModuleUnload = 22,
36
+ CUPTI_DRIVER_TRACE_CBID_cuModuleGetFunction = 23,
37
+ CUPTI_DRIVER_TRACE_CBID_cuModuleGetGlobal = 24,
38
+ CUPTI_DRIVER_TRACE_CBID_cu64ModuleGetGlobal = 25,
39
+ CUPTI_DRIVER_TRACE_CBID_cuModuleGetTexRef = 26,
40
+ CUPTI_DRIVER_TRACE_CBID_cuMemGetInfo = 27,
41
+ CUPTI_DRIVER_TRACE_CBID_cu64MemGetInfo = 28,
42
+ CUPTI_DRIVER_TRACE_CBID_cuMemAlloc = 29,
43
+ CUPTI_DRIVER_TRACE_CBID_cu64MemAlloc = 30,
44
+ CUPTI_DRIVER_TRACE_CBID_cuMemAllocPitch = 31,
45
+ CUPTI_DRIVER_TRACE_CBID_cu64MemAllocPitch = 32,
46
+ CUPTI_DRIVER_TRACE_CBID_cuMemFree = 33,
47
+ CUPTI_DRIVER_TRACE_CBID_cu64MemFree = 34,
48
+ CUPTI_DRIVER_TRACE_CBID_cuMemGetAddressRange = 35,
49
+ CUPTI_DRIVER_TRACE_CBID_cu64MemGetAddressRange = 36,
50
+ CUPTI_DRIVER_TRACE_CBID_cuMemAllocHost = 37,
51
+ CUPTI_DRIVER_TRACE_CBID_cuMemFreeHost = 38,
52
+ CUPTI_DRIVER_TRACE_CBID_cuMemHostAlloc = 39,
53
+ CUPTI_DRIVER_TRACE_CBID_cuMemHostGetDevicePointer = 40,
54
+ CUPTI_DRIVER_TRACE_CBID_cu64MemHostGetDevicePointer = 41,
55
+ CUPTI_DRIVER_TRACE_CBID_cuMemHostGetFlags = 42,
56
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoD = 43,
57
+ CUPTI_DRIVER_TRACE_CBID_cu64MemcpyHtoD = 44,
58
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoH = 45,
59
+ CUPTI_DRIVER_TRACE_CBID_cu64MemcpyDtoH = 46,
60
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoD = 47,
61
+ CUPTI_DRIVER_TRACE_CBID_cu64MemcpyDtoD = 48,
62
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoA = 49,
63
+ CUPTI_DRIVER_TRACE_CBID_cu64MemcpyDtoA = 50,
64
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoD = 51,
65
+ CUPTI_DRIVER_TRACE_CBID_cu64MemcpyAtoD = 52,
66
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoA = 53,
67
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoH = 54,
68
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoA = 55,
69
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy2D = 56,
70
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DUnaligned = 57,
71
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy3D = 58,
72
+ CUPTI_DRIVER_TRACE_CBID_cu64Memcpy3D = 59,
73
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoDAsync = 60,
74
+ CUPTI_DRIVER_TRACE_CBID_cu64MemcpyHtoDAsync = 61,
75
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoHAsync = 62,
76
+ CUPTI_DRIVER_TRACE_CBID_cu64MemcpyDtoHAsync = 63,
77
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoDAsync = 64,
78
+ CUPTI_DRIVER_TRACE_CBID_cu64MemcpyDtoDAsync = 65,
79
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoAAsync = 66,
80
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoHAsync = 67,
81
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DAsync = 68,
82
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DAsync = 69,
83
+ CUPTI_DRIVER_TRACE_CBID_cu64Memcpy3DAsync = 70,
84
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD8 = 71,
85
+ CUPTI_DRIVER_TRACE_CBID_cu64MemsetD8 = 72,
86
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD16 = 73,
87
+ CUPTI_DRIVER_TRACE_CBID_cu64MemsetD16 = 74,
88
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD32 = 75,
89
+ CUPTI_DRIVER_TRACE_CBID_cu64MemsetD32 = 76,
90
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D8 = 77,
91
+ CUPTI_DRIVER_TRACE_CBID_cu64MemsetD2D8 = 78,
92
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D16 = 79,
93
+ CUPTI_DRIVER_TRACE_CBID_cu64MemsetD2D16 = 80,
94
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D32 = 81,
95
+ CUPTI_DRIVER_TRACE_CBID_cu64MemsetD2D32 = 82,
96
+ CUPTI_DRIVER_TRACE_CBID_cuFuncSetBlockShape = 83,
97
+ CUPTI_DRIVER_TRACE_CBID_cuFuncSetSharedSize = 84,
98
+ CUPTI_DRIVER_TRACE_CBID_cuFuncGetAttribute = 85,
99
+ CUPTI_DRIVER_TRACE_CBID_cuFuncSetCacheConfig = 86,
100
+ CUPTI_DRIVER_TRACE_CBID_cuArrayCreate = 87,
101
+ CUPTI_DRIVER_TRACE_CBID_cuArrayGetDescriptor = 88,
102
+ CUPTI_DRIVER_TRACE_CBID_cuArrayDestroy = 89,
103
+ CUPTI_DRIVER_TRACE_CBID_cuArray3DCreate = 90,
104
+ CUPTI_DRIVER_TRACE_CBID_cuArray3DGetDescriptor = 91,
105
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefCreate = 92,
106
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefDestroy = 93,
107
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetArray = 94,
108
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetAddress = 95,
109
+ CUPTI_DRIVER_TRACE_CBID_cu64TexRefSetAddress = 96,
110
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetAddress2D = 97,
111
+ CUPTI_DRIVER_TRACE_CBID_cu64TexRefSetAddress2D = 98,
112
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetFormat = 99,
113
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetAddressMode = 100,
114
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetFilterMode = 101,
115
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetFlags = 102,
116
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefGetAddress = 103,
117
+ CUPTI_DRIVER_TRACE_CBID_cu64TexRefGetAddress = 104,
118
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefGetArray = 105,
119
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefGetAddressMode = 106,
120
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefGetFilterMode = 107,
121
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefGetFormat = 108,
122
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefGetFlags = 109,
123
+ CUPTI_DRIVER_TRACE_CBID_cuParamSetSize = 110,
124
+ CUPTI_DRIVER_TRACE_CBID_cuParamSeti = 111,
125
+ CUPTI_DRIVER_TRACE_CBID_cuParamSetf = 112,
126
+ CUPTI_DRIVER_TRACE_CBID_cuParamSetv = 113,
127
+ CUPTI_DRIVER_TRACE_CBID_cuParamSetTexRef = 114,
128
+ CUPTI_DRIVER_TRACE_CBID_cuLaunch = 115,
129
+ CUPTI_DRIVER_TRACE_CBID_cuLaunchGrid = 116,
130
+ CUPTI_DRIVER_TRACE_CBID_cuLaunchGridAsync = 117,
131
+ CUPTI_DRIVER_TRACE_CBID_cuEventCreate = 118,
132
+ CUPTI_DRIVER_TRACE_CBID_cuEventRecord = 119,
133
+ CUPTI_DRIVER_TRACE_CBID_cuEventQuery = 120,
134
+ CUPTI_DRIVER_TRACE_CBID_cuEventSynchronize = 121,
135
+ CUPTI_DRIVER_TRACE_CBID_cuEventDestroy = 122,
136
+ CUPTI_DRIVER_TRACE_CBID_cuEventElapsedTime = 123,
137
+ CUPTI_DRIVER_TRACE_CBID_cuStreamCreate = 124,
138
+ CUPTI_DRIVER_TRACE_CBID_cuStreamQuery = 125,
139
+ CUPTI_DRIVER_TRACE_CBID_cuStreamSynchronize = 126,
140
+ CUPTI_DRIVER_TRACE_CBID_cuStreamDestroy = 127,
141
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsUnregisterResource = 128,
142
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsSubResourceGetMappedArray = 129,
143
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsResourceGetMappedPointer = 130,
144
+ CUPTI_DRIVER_TRACE_CBID_cu64GraphicsResourceGetMappedPointer = 131,
145
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsResourceSetMapFlags = 132,
146
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsMapResources = 133,
147
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsUnmapResources = 134,
148
+ CUPTI_DRIVER_TRACE_CBID_cuGetExportTable = 135,
149
+ CUPTI_DRIVER_TRACE_CBID_cuCtxSetLimit = 136,
150
+ CUPTI_DRIVER_TRACE_CBID_cuCtxGetLimit = 137,
151
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10GetDevice = 138,
152
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10CtxCreate = 139,
153
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsD3D10RegisterResource = 140,
154
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10RegisterResource = 141,
155
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10UnregisterResource = 142,
156
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10MapResources = 143,
157
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10UnmapResources = 144,
158
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10ResourceSetMapFlags = 145,
159
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10ResourceGetMappedArray = 146,
160
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10ResourceGetMappedPointer = 147,
161
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10ResourceGetMappedSize = 148,
162
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10ResourceGetMappedPitch = 149,
163
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10ResourceGetSurfaceDimensions = 150,
164
+ CUPTI_DRIVER_TRACE_CBID_cuD3D11GetDevice = 151,
165
+ CUPTI_DRIVER_TRACE_CBID_cuD3D11CtxCreate = 152,
166
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsD3D11RegisterResource = 153,
167
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9GetDevice = 154,
168
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9CtxCreate = 155,
169
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsD3D9RegisterResource = 156,
170
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9GetDirect3DDevice = 157,
171
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9RegisterResource = 158,
172
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9UnregisterResource = 159,
173
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9MapResources = 160,
174
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9UnmapResources = 161,
175
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9ResourceSetMapFlags = 162,
176
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9ResourceGetSurfaceDimensions = 163,
177
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9ResourceGetMappedArray = 164,
178
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9ResourceGetMappedPointer = 165,
179
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9ResourceGetMappedSize = 166,
180
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9ResourceGetMappedPitch = 167,
181
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9Begin = 168,
182
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9End = 169,
183
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9RegisterVertexBuffer = 170,
184
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9MapVertexBuffer = 171,
185
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9UnmapVertexBuffer = 172,
186
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9UnregisterVertexBuffer = 173,
187
+ CUPTI_DRIVER_TRACE_CBID_cuGLCtxCreate = 174,
188
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsGLRegisterBuffer = 175,
189
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsGLRegisterImage = 176,
190
+ CUPTI_DRIVER_TRACE_CBID_cuWGLGetDevice = 177,
191
+ CUPTI_DRIVER_TRACE_CBID_cuGLInit = 178,
192
+ CUPTI_DRIVER_TRACE_CBID_cuGLRegisterBufferObject = 179,
193
+ CUPTI_DRIVER_TRACE_CBID_cuGLMapBufferObject = 180,
194
+ CUPTI_DRIVER_TRACE_CBID_cuGLUnmapBufferObject = 181,
195
+ CUPTI_DRIVER_TRACE_CBID_cuGLUnregisterBufferObject = 182,
196
+ CUPTI_DRIVER_TRACE_CBID_cuGLSetBufferObjectMapFlags = 183,
197
+ CUPTI_DRIVER_TRACE_CBID_cuGLMapBufferObjectAsync = 184,
198
+ CUPTI_DRIVER_TRACE_CBID_cuGLUnmapBufferObjectAsync = 185,
199
+ CUPTI_DRIVER_TRACE_CBID_cuVDPAUGetDevice = 186,
200
+ CUPTI_DRIVER_TRACE_CBID_cuVDPAUCtxCreate = 187,
201
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsVDPAURegisterVideoSurface = 188,
202
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsVDPAURegisterOutputSurface = 189,
203
+ CUPTI_DRIVER_TRACE_CBID_cuModuleGetSurfRef = 190,
204
+ CUPTI_DRIVER_TRACE_CBID_cuSurfRefCreate = 191,
205
+ CUPTI_DRIVER_TRACE_CBID_cuSurfRefDestroy = 192,
206
+ CUPTI_DRIVER_TRACE_CBID_cuSurfRefSetFormat = 193,
207
+ CUPTI_DRIVER_TRACE_CBID_cuSurfRefSetArray = 194,
208
+ CUPTI_DRIVER_TRACE_CBID_cuSurfRefGetFormat = 195,
209
+ CUPTI_DRIVER_TRACE_CBID_cuSurfRefGetArray = 196,
210
+ CUPTI_DRIVER_TRACE_CBID_cu64DeviceTotalMem = 197,
211
+ CUPTI_DRIVER_TRACE_CBID_cu64D3D10ResourceGetMappedPointer = 198,
212
+ CUPTI_DRIVER_TRACE_CBID_cu64D3D10ResourceGetMappedSize = 199,
213
+ CUPTI_DRIVER_TRACE_CBID_cu64D3D10ResourceGetMappedPitch = 200,
214
+ CUPTI_DRIVER_TRACE_CBID_cu64D3D10ResourceGetSurfaceDimensions = 201,
215
+ CUPTI_DRIVER_TRACE_CBID_cu64D3D9ResourceGetSurfaceDimensions = 202,
216
+ CUPTI_DRIVER_TRACE_CBID_cu64D3D9ResourceGetMappedPointer = 203,
217
+ CUPTI_DRIVER_TRACE_CBID_cu64D3D9ResourceGetMappedSize = 204,
218
+ CUPTI_DRIVER_TRACE_CBID_cu64D3D9ResourceGetMappedPitch = 205,
219
+ CUPTI_DRIVER_TRACE_CBID_cu64D3D9MapVertexBuffer = 206,
220
+ CUPTI_DRIVER_TRACE_CBID_cu64GLMapBufferObject = 207,
221
+ CUPTI_DRIVER_TRACE_CBID_cu64GLMapBufferObjectAsync = 208,
222
+ CUPTI_DRIVER_TRACE_CBID_cuD3D11GetDevices = 209,
223
+ CUPTI_DRIVER_TRACE_CBID_cuD3D11CtxCreateOnDevice = 210,
224
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10GetDevices = 211,
225
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10CtxCreateOnDevice = 212,
226
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9GetDevices = 213,
227
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9CtxCreateOnDevice = 214,
228
+ CUPTI_DRIVER_TRACE_CBID_cu64MemHostAlloc = 215,
229
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD8Async = 216,
230
+ CUPTI_DRIVER_TRACE_CBID_cu64MemsetD8Async = 217,
231
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD16Async = 218,
232
+ CUPTI_DRIVER_TRACE_CBID_cu64MemsetD16Async = 219,
233
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD32Async = 220,
234
+ CUPTI_DRIVER_TRACE_CBID_cu64MemsetD32Async = 221,
235
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D8Async = 222,
236
+ CUPTI_DRIVER_TRACE_CBID_cu64MemsetD2D8Async = 223,
237
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D16Async = 224,
238
+ CUPTI_DRIVER_TRACE_CBID_cu64MemsetD2D16Async = 225,
239
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D32Async = 226,
240
+ CUPTI_DRIVER_TRACE_CBID_cu64MemsetD2D32Async = 227,
241
+ CUPTI_DRIVER_TRACE_CBID_cu64ArrayCreate = 228,
242
+ CUPTI_DRIVER_TRACE_CBID_cu64ArrayGetDescriptor = 229,
243
+ CUPTI_DRIVER_TRACE_CBID_cu64Array3DCreate = 230,
244
+ CUPTI_DRIVER_TRACE_CBID_cu64Array3DGetDescriptor = 231,
245
+ CUPTI_DRIVER_TRACE_CBID_cu64Memcpy2D = 232,
246
+ CUPTI_DRIVER_TRACE_CBID_cu64Memcpy2DUnaligned = 233,
247
+ CUPTI_DRIVER_TRACE_CBID_cu64Memcpy2DAsync = 234,
248
+ CUPTI_DRIVER_TRACE_CBID_cuCtxCreate_v2 = 235,
249
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10CtxCreate_v2 = 236,
250
+ CUPTI_DRIVER_TRACE_CBID_cuD3D11CtxCreate_v2 = 237,
251
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9CtxCreate_v2 = 238,
252
+ CUPTI_DRIVER_TRACE_CBID_cuGLCtxCreate_v2 = 239,
253
+ CUPTI_DRIVER_TRACE_CBID_cuVDPAUCtxCreate_v2 = 240,
254
+ CUPTI_DRIVER_TRACE_CBID_cuModuleGetGlobal_v2 = 241,
255
+ CUPTI_DRIVER_TRACE_CBID_cuMemGetInfo_v2 = 242,
256
+ CUPTI_DRIVER_TRACE_CBID_cuMemAlloc_v2 = 243,
257
+ CUPTI_DRIVER_TRACE_CBID_cuMemAllocPitch_v2 = 244,
258
+ CUPTI_DRIVER_TRACE_CBID_cuMemFree_v2 = 245,
259
+ CUPTI_DRIVER_TRACE_CBID_cuMemGetAddressRange_v2 = 246,
260
+ CUPTI_DRIVER_TRACE_CBID_cuMemHostGetDevicePointer_v2 = 247,
261
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy_v2 = 248,
262
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD8_v2 = 249,
263
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD16_v2 = 250,
264
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD32_v2 = 251,
265
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D8_v2 = 252,
266
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D16_v2 = 253,
267
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D32_v2 = 254,
268
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetAddress_v2 = 255,
269
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetAddress2D_v2 = 256,
270
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefGetAddress_v2 = 257,
271
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsResourceGetMappedPointer_v2 = 258,
272
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceTotalMem_v2 = 259,
273
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10ResourceGetMappedPointer_v2 = 260,
274
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10ResourceGetMappedSize_v2 = 261,
275
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10ResourceGetMappedPitch_v2 = 262,
276
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10ResourceGetSurfaceDimensions_v2 = 263,
277
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9ResourceGetSurfaceDimensions_v2 = 264,
278
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9ResourceGetMappedPointer_v2 = 265,
279
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9ResourceGetMappedSize_v2 = 266,
280
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9ResourceGetMappedPitch_v2 = 267,
281
+ CUPTI_DRIVER_TRACE_CBID_cuD3D9MapVertexBuffer_v2 = 268,
282
+ CUPTI_DRIVER_TRACE_CBID_cuGLMapBufferObject_v2 = 269,
283
+ CUPTI_DRIVER_TRACE_CBID_cuGLMapBufferObjectAsync_v2 = 270,
284
+ CUPTI_DRIVER_TRACE_CBID_cuMemHostAlloc_v2 = 271,
285
+ CUPTI_DRIVER_TRACE_CBID_cuArrayCreate_v2 = 272,
286
+ CUPTI_DRIVER_TRACE_CBID_cuArrayGetDescriptor_v2 = 273,
287
+ CUPTI_DRIVER_TRACE_CBID_cuArray3DCreate_v2 = 274,
288
+ CUPTI_DRIVER_TRACE_CBID_cuArray3DGetDescriptor_v2 = 275,
289
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoD_v2 = 276,
290
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoDAsync_v2 = 277,
291
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoH_v2 = 278,
292
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoHAsync_v2 = 279,
293
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoD_v2 = 280,
294
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoDAsync_v2 = 281,
295
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoH_v2 = 282,
296
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoHAsync_v2 = 283,
297
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoD_v2 = 284,
298
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoA_v2 = 285,
299
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoA_v2 = 286,
300
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy2D_v2 = 287,
301
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DUnaligned_v2 = 288,
302
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DAsync_v2 = 289,
303
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy3D_v2 = 290,
304
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DAsync_v2 = 291,
305
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoA_v2 = 292,
306
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoAAsync_v2 = 293,
307
+ CUPTI_DRIVER_TRACE_CBID_cuMemAllocHost_v2 = 294,
308
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWaitEvent = 295,
309
+ CUPTI_DRIVER_TRACE_CBID_cuCtxGetApiVersion = 296,
310
+ CUPTI_DRIVER_TRACE_CBID_cuD3D10GetDirect3DDevice = 297,
311
+ CUPTI_DRIVER_TRACE_CBID_cuD3D11GetDirect3DDevice = 298,
312
+ CUPTI_DRIVER_TRACE_CBID_cuCtxGetCacheConfig = 299,
313
+ CUPTI_DRIVER_TRACE_CBID_cuCtxSetCacheConfig = 300,
314
+ CUPTI_DRIVER_TRACE_CBID_cuMemHostRegister = 301,
315
+ CUPTI_DRIVER_TRACE_CBID_cuMemHostUnregister = 302,
316
+ CUPTI_DRIVER_TRACE_CBID_cuCtxSetCurrent = 303,
317
+ CUPTI_DRIVER_TRACE_CBID_cuCtxGetCurrent = 304,
318
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy = 305,
319
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyAsync = 306,
320
+ CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel = 307,
321
+ CUPTI_DRIVER_TRACE_CBID_cuProfilerStart = 308,
322
+ CUPTI_DRIVER_TRACE_CBID_cuProfilerStop = 309,
323
+ CUPTI_DRIVER_TRACE_CBID_cuPointerGetAttribute = 310,
324
+ CUPTI_DRIVER_TRACE_CBID_cuProfilerInitialize = 311,
325
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceCanAccessPeer = 312,
326
+ CUPTI_DRIVER_TRACE_CBID_cuCtxEnablePeerAccess = 313,
327
+ CUPTI_DRIVER_TRACE_CBID_cuCtxDisablePeerAccess = 314,
328
+ CUPTI_DRIVER_TRACE_CBID_cuMemPeerRegister = 315,
329
+ CUPTI_DRIVER_TRACE_CBID_cuMemPeerUnregister = 316,
330
+ CUPTI_DRIVER_TRACE_CBID_cuMemPeerGetDevicePointer = 317,
331
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeer = 318,
332
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeerAsync = 319,
333
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DPeer = 320,
334
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DPeerAsync = 321,
335
+ CUPTI_DRIVER_TRACE_CBID_cuCtxDestroy_v2 = 322,
336
+ CUPTI_DRIVER_TRACE_CBID_cuCtxPushCurrent_v2 = 323,
337
+ CUPTI_DRIVER_TRACE_CBID_cuCtxPopCurrent_v2 = 324,
338
+ CUPTI_DRIVER_TRACE_CBID_cuEventDestroy_v2 = 325,
339
+ CUPTI_DRIVER_TRACE_CBID_cuStreamDestroy_v2 = 326,
340
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetAddress2D_v3 = 327,
341
+ CUPTI_DRIVER_TRACE_CBID_cuIpcGetMemHandle = 328,
342
+ CUPTI_DRIVER_TRACE_CBID_cuIpcOpenMemHandle = 329,
343
+ CUPTI_DRIVER_TRACE_CBID_cuIpcCloseMemHandle = 330,
344
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetByPCIBusId = 331,
345
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetPCIBusId = 332,
346
+ CUPTI_DRIVER_TRACE_CBID_cuGLGetDevices = 333,
347
+ CUPTI_DRIVER_TRACE_CBID_cuIpcGetEventHandle = 334,
348
+ CUPTI_DRIVER_TRACE_CBID_cuIpcOpenEventHandle = 335,
349
+ CUPTI_DRIVER_TRACE_CBID_cuCtxSetSharedMemConfig = 336,
350
+ CUPTI_DRIVER_TRACE_CBID_cuCtxGetSharedMemConfig = 337,
351
+ CUPTI_DRIVER_TRACE_CBID_cuFuncSetSharedMemConfig = 338,
352
+ CUPTI_DRIVER_TRACE_CBID_cuTexObjectCreate = 339,
353
+ CUPTI_DRIVER_TRACE_CBID_cuTexObjectDestroy = 340,
354
+ CUPTI_DRIVER_TRACE_CBID_cuTexObjectGetResourceDesc = 341,
355
+ CUPTI_DRIVER_TRACE_CBID_cuTexObjectGetTextureDesc = 342,
356
+ CUPTI_DRIVER_TRACE_CBID_cuSurfObjectCreate = 343,
357
+ CUPTI_DRIVER_TRACE_CBID_cuSurfObjectDestroy = 344,
358
+ CUPTI_DRIVER_TRACE_CBID_cuSurfObjectGetResourceDesc = 345,
359
+ CUPTI_DRIVER_TRACE_CBID_cuStreamAddCallback = 346,
360
+ CUPTI_DRIVER_TRACE_CBID_cuMipmappedArrayCreate = 347,
361
+ CUPTI_DRIVER_TRACE_CBID_cuMipmappedArrayGetLevel = 348,
362
+ CUPTI_DRIVER_TRACE_CBID_cuMipmappedArrayDestroy = 349,
363
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetMipmappedArray = 350,
364
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetMipmapFilterMode = 351,
365
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetMipmapLevelBias = 352,
366
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetMipmapLevelClamp = 353,
367
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetMaxAnisotropy = 354,
368
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefGetMipmappedArray = 355,
369
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefGetMipmapFilterMode = 356,
370
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefGetMipmapLevelBias = 357,
371
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefGetMipmapLevelClamp = 358,
372
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefGetMaxAnisotropy = 359,
373
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsResourceGetMappedMipmappedArray = 360,
374
+ CUPTI_DRIVER_TRACE_CBID_cuTexObjectGetResourceViewDesc = 361,
375
+ CUPTI_DRIVER_TRACE_CBID_cuLinkCreate = 362,
376
+ CUPTI_DRIVER_TRACE_CBID_cuLinkAddData = 363,
377
+ CUPTI_DRIVER_TRACE_CBID_cuLinkAddFile = 364,
378
+ CUPTI_DRIVER_TRACE_CBID_cuLinkComplete = 365,
379
+ CUPTI_DRIVER_TRACE_CBID_cuLinkDestroy = 366,
380
+ CUPTI_DRIVER_TRACE_CBID_cuStreamCreateWithPriority = 367,
381
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetPriority = 368,
382
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetFlags = 369,
383
+ CUPTI_DRIVER_TRACE_CBID_cuCtxGetStreamPriorityRange = 370,
384
+ CUPTI_DRIVER_TRACE_CBID_cuMemAllocManaged = 371,
385
+ CUPTI_DRIVER_TRACE_CBID_cuGetErrorString = 372,
386
+ CUPTI_DRIVER_TRACE_CBID_cuGetErrorName = 373,
387
+ CUPTI_DRIVER_TRACE_CBID_cuOccupancyMaxActiveBlocksPerMultiprocessor = 374,
388
+ CUPTI_DRIVER_TRACE_CBID_cuCompilePtx = 375,
389
+ CUPTI_DRIVER_TRACE_CBID_cuBinaryFree = 376,
390
+ CUPTI_DRIVER_TRACE_CBID_cuStreamAttachMemAsync = 377,
391
+ CUPTI_DRIVER_TRACE_CBID_cuPointerSetAttribute = 378,
392
+ CUPTI_DRIVER_TRACE_CBID_cuMemHostRegister_v2 = 379,
393
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsResourceSetMapFlags_v2 = 380,
394
+ CUPTI_DRIVER_TRACE_CBID_cuLinkCreate_v2 = 381,
395
+ CUPTI_DRIVER_TRACE_CBID_cuLinkAddData_v2 = 382,
396
+ CUPTI_DRIVER_TRACE_CBID_cuLinkAddFile_v2 = 383,
397
+ CUPTI_DRIVER_TRACE_CBID_cuOccupancyMaxPotentialBlockSize = 384,
398
+ CUPTI_DRIVER_TRACE_CBID_cuGLGetDevices_v2 = 385,
399
+ CUPTI_DRIVER_TRACE_CBID_cuDevicePrimaryCtxRetain = 386,
400
+ CUPTI_DRIVER_TRACE_CBID_cuDevicePrimaryCtxRelease = 387,
401
+ CUPTI_DRIVER_TRACE_CBID_cuDevicePrimaryCtxSetFlags = 388,
402
+ CUPTI_DRIVER_TRACE_CBID_cuDevicePrimaryCtxReset = 389,
403
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsEGLRegisterImage = 390,
404
+ CUPTI_DRIVER_TRACE_CBID_cuCtxGetFlags = 391,
405
+ CUPTI_DRIVER_TRACE_CBID_cuDevicePrimaryCtxGetState = 392,
406
+ CUPTI_DRIVER_TRACE_CBID_cuEGLStreamConsumerConnect = 393,
407
+ CUPTI_DRIVER_TRACE_CBID_cuEGLStreamConsumerDisconnect = 394,
408
+ CUPTI_DRIVER_TRACE_CBID_cuEGLStreamConsumerAcquireFrame = 395,
409
+ CUPTI_DRIVER_TRACE_CBID_cuEGLStreamConsumerReleaseFrame = 396,
410
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoD_v2_ptds = 397,
411
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoH_v2_ptds = 398,
412
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoD_v2_ptds = 399,
413
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoA_v2_ptds = 400,
414
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoD_v2_ptds = 401,
415
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoA_v2_ptds = 402,
416
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoH_v2_ptds = 403,
417
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoA_v2_ptds = 404,
418
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy2D_v2_ptds = 405,
419
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DUnaligned_v2_ptds = 406,
420
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy3D_v2_ptds = 407,
421
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy_ptds = 408,
422
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeer_ptds = 409,
423
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DPeer_ptds = 410,
424
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD8_v2_ptds = 411,
425
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD16_v2_ptds = 412,
426
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD32_v2_ptds = 413,
427
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D8_v2_ptds = 414,
428
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D16_v2_ptds = 415,
429
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D32_v2_ptds = 416,
430
+ CUPTI_DRIVER_TRACE_CBID_cuGLMapBufferObject_v2_ptds = 417,
431
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyAsync_ptsz = 418,
432
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoAAsync_v2_ptsz = 419,
433
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoHAsync_v2_ptsz = 420,
434
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoDAsync_v2_ptsz = 421,
435
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoHAsync_v2_ptsz = 422,
436
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoDAsync_v2_ptsz = 423,
437
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DAsync_v2_ptsz = 424,
438
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DAsync_v2_ptsz = 425,
439
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeerAsync_ptsz = 426,
440
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DPeerAsync_ptsz = 427,
441
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD8Async_ptsz = 428,
442
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD16Async_ptsz = 429,
443
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD32Async_ptsz = 430,
444
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D8Async_ptsz = 431,
445
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D16Async_ptsz = 432,
446
+ CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D32Async_ptsz = 433,
447
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetPriority_ptsz = 434,
448
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetFlags_ptsz = 435,
449
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWaitEvent_ptsz = 436,
450
+ CUPTI_DRIVER_TRACE_CBID_cuStreamAddCallback_ptsz = 437,
451
+ CUPTI_DRIVER_TRACE_CBID_cuStreamAttachMemAsync_ptsz = 438,
452
+ CUPTI_DRIVER_TRACE_CBID_cuStreamQuery_ptsz = 439,
453
+ CUPTI_DRIVER_TRACE_CBID_cuStreamSynchronize_ptsz = 440,
454
+ CUPTI_DRIVER_TRACE_CBID_cuEventRecord_ptsz = 441,
455
+ CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel_ptsz = 442,
456
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsMapResources_ptsz = 443,
457
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsUnmapResources_ptsz = 444,
458
+ CUPTI_DRIVER_TRACE_CBID_cuGLMapBufferObjectAsync_v2_ptsz = 445,
459
+ CUPTI_DRIVER_TRACE_CBID_cuEGLStreamProducerConnect = 446,
460
+ CUPTI_DRIVER_TRACE_CBID_cuEGLStreamProducerDisconnect = 447,
461
+ CUPTI_DRIVER_TRACE_CBID_cuEGLStreamProducerPresentFrame = 448,
462
+ CUPTI_DRIVER_TRACE_CBID_cuGraphicsResourceGetMappedEglFrame = 449,
463
+ CUPTI_DRIVER_TRACE_CBID_cuPointerGetAttributes = 450,
464
+ CUPTI_DRIVER_TRACE_CBID_cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = 451,
465
+ CUPTI_DRIVER_TRACE_CBID_cuOccupancyMaxPotentialBlockSizeWithFlags = 452,
466
+ CUPTI_DRIVER_TRACE_CBID_cuEGLStreamProducerReturnFrame = 453,
467
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetP2PAttribute = 454,
468
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefSetBorderColor = 455,
469
+ CUPTI_DRIVER_TRACE_CBID_cuTexRefGetBorderColor = 456,
470
+ CUPTI_DRIVER_TRACE_CBID_cuMemAdvise = 457,
471
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWaitValue32 = 458,
472
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWaitValue32_ptsz = 459,
473
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWriteValue32 = 460,
474
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWriteValue32_ptsz = 461,
475
+ CUPTI_DRIVER_TRACE_CBID_cuStreamBatchMemOp = 462,
476
+ CUPTI_DRIVER_TRACE_CBID_cuStreamBatchMemOp_ptsz = 463,
477
+ CUPTI_DRIVER_TRACE_CBID_cuNVNbufferGetPointer = 464,
478
+ CUPTI_DRIVER_TRACE_CBID_cuNVNtextureGetArray = 465,
479
+ CUPTI_DRIVER_TRACE_CBID_cuNNSetAllocator = 466,
480
+ CUPTI_DRIVER_TRACE_CBID_cuMemPrefetchAsync = 467,
481
+ CUPTI_DRIVER_TRACE_CBID_cuMemPrefetchAsync_ptsz = 468,
482
+ CUPTI_DRIVER_TRACE_CBID_cuEventCreateFromNVNSync = 469,
483
+ CUPTI_DRIVER_TRACE_CBID_cuEGLStreamConsumerConnectWithFlags = 470,
484
+ CUPTI_DRIVER_TRACE_CBID_cuMemRangeGetAttribute = 471,
485
+ CUPTI_DRIVER_TRACE_CBID_cuMemRangeGetAttributes = 472,
486
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWaitValue64 = 473,
487
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWaitValue64_ptsz = 474,
488
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWriteValue64 = 475,
489
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWriteValue64_ptsz = 476,
490
+ CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernel = 477,
491
+ CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernel_ptsz = 478,
492
+ CUPTI_DRIVER_TRACE_CBID_cuEventCreateFromEGLSync = 479,
493
+ CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernelMultiDevice = 480,
494
+ CUPTI_DRIVER_TRACE_CBID_cuFuncSetAttribute = 481,
495
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetUuid = 482,
496
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetCtx = 483,
497
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetCtx_ptsz = 484,
498
+ CUPTI_DRIVER_TRACE_CBID_cuImportExternalMemory = 485,
499
+ CUPTI_DRIVER_TRACE_CBID_cuExternalMemoryGetMappedBuffer = 486,
500
+ CUPTI_DRIVER_TRACE_CBID_cuExternalMemoryGetMappedMipmappedArray = 487,
501
+ CUPTI_DRIVER_TRACE_CBID_cuDestroyExternalMemory = 488,
502
+ CUPTI_DRIVER_TRACE_CBID_cuImportExternalSemaphore = 489,
503
+ CUPTI_DRIVER_TRACE_CBID_cuSignalExternalSemaphoresAsync = 490,
504
+ CUPTI_DRIVER_TRACE_CBID_cuSignalExternalSemaphoresAsync_ptsz = 491,
505
+ CUPTI_DRIVER_TRACE_CBID_cuWaitExternalSemaphoresAsync = 492,
506
+ CUPTI_DRIVER_TRACE_CBID_cuWaitExternalSemaphoresAsync_ptsz = 493,
507
+ CUPTI_DRIVER_TRACE_CBID_cuDestroyExternalSemaphore = 494,
508
+ CUPTI_DRIVER_TRACE_CBID_cuStreamBeginCapture = 495,
509
+ CUPTI_DRIVER_TRACE_CBID_cuStreamBeginCapture_ptsz = 496,
510
+ CUPTI_DRIVER_TRACE_CBID_cuStreamEndCapture = 497,
511
+ CUPTI_DRIVER_TRACE_CBID_cuStreamEndCapture_ptsz = 498,
512
+ CUPTI_DRIVER_TRACE_CBID_cuStreamIsCapturing = 499,
513
+ CUPTI_DRIVER_TRACE_CBID_cuStreamIsCapturing_ptsz = 500,
514
+ CUPTI_DRIVER_TRACE_CBID_cuGraphCreate = 501,
515
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddKernelNode = 502,
516
+ CUPTI_DRIVER_TRACE_CBID_cuGraphKernelNodeGetParams = 503,
517
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddMemcpyNode = 504,
518
+ CUPTI_DRIVER_TRACE_CBID_cuGraphMemcpyNodeGetParams = 505,
519
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddMemsetNode = 506,
520
+ CUPTI_DRIVER_TRACE_CBID_cuGraphMemsetNodeGetParams = 507,
521
+ CUPTI_DRIVER_TRACE_CBID_cuGraphMemsetNodeSetParams = 508,
522
+ CUPTI_DRIVER_TRACE_CBID_cuGraphNodeGetType = 509,
523
+ CUPTI_DRIVER_TRACE_CBID_cuGraphGetRootNodes = 510,
524
+ CUPTI_DRIVER_TRACE_CBID_cuGraphNodeGetDependencies = 511,
525
+ CUPTI_DRIVER_TRACE_CBID_cuGraphNodeGetDependentNodes = 512,
526
+ CUPTI_DRIVER_TRACE_CBID_cuGraphInstantiate = 513,
527
+ CUPTI_DRIVER_TRACE_CBID_cuGraphLaunch = 514,
528
+ CUPTI_DRIVER_TRACE_CBID_cuGraphLaunch_ptsz = 515,
529
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecDestroy = 516,
530
+ CUPTI_DRIVER_TRACE_CBID_cuGraphDestroy = 517,
531
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddDependencies = 518,
532
+ CUPTI_DRIVER_TRACE_CBID_cuGraphRemoveDependencies = 519,
533
+ CUPTI_DRIVER_TRACE_CBID_cuGraphMemcpyNodeSetParams = 520,
534
+ CUPTI_DRIVER_TRACE_CBID_cuGraphKernelNodeSetParams = 521,
535
+ CUPTI_DRIVER_TRACE_CBID_cuGraphDestroyNode = 522,
536
+ CUPTI_DRIVER_TRACE_CBID_cuGraphClone = 523,
537
+ CUPTI_DRIVER_TRACE_CBID_cuGraphNodeFindInClone = 524,
538
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddChildGraphNode = 525,
539
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddEmptyNode = 526,
540
+ CUPTI_DRIVER_TRACE_CBID_cuLaunchHostFunc = 527,
541
+ CUPTI_DRIVER_TRACE_CBID_cuLaunchHostFunc_ptsz = 528,
542
+ CUPTI_DRIVER_TRACE_CBID_cuGraphChildGraphNodeGetGraph = 529,
543
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddHostNode = 530,
544
+ CUPTI_DRIVER_TRACE_CBID_cuGraphHostNodeGetParams = 531,
545
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetLuid = 532,
546
+ CUPTI_DRIVER_TRACE_CBID_cuGraphHostNodeSetParams = 533,
547
+ CUPTI_DRIVER_TRACE_CBID_cuGraphGetNodes = 534,
548
+ CUPTI_DRIVER_TRACE_CBID_cuGraphGetEdges = 535,
549
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetCaptureInfo = 536,
550
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetCaptureInfo_ptsz = 537,
551
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecKernelNodeSetParams = 538,
552
+ CUPTI_DRIVER_TRACE_CBID_cuStreamBeginCapture_v2 = 539,
553
+ CUPTI_DRIVER_TRACE_CBID_cuStreamBeginCapture_v2_ptsz = 540,
554
+ CUPTI_DRIVER_TRACE_CBID_cuThreadExchangeStreamCaptureMode = 541,
555
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetNvSciSyncAttributes = 542,
556
+ CUPTI_DRIVER_TRACE_CBID_cuOccupancyAvailableDynamicSMemPerBlock = 543,
557
+ CUPTI_DRIVER_TRACE_CBID_cuDevicePrimaryCtxRelease_v2 = 544,
558
+ CUPTI_DRIVER_TRACE_CBID_cuDevicePrimaryCtxReset_v2 = 545,
559
+ CUPTI_DRIVER_TRACE_CBID_cuDevicePrimaryCtxSetFlags_v2 = 546,
560
+ CUPTI_DRIVER_TRACE_CBID_cuMemAddressReserve = 547,
561
+ CUPTI_DRIVER_TRACE_CBID_cuMemAddressFree = 548,
562
+ CUPTI_DRIVER_TRACE_CBID_cuMemCreate = 549,
563
+ CUPTI_DRIVER_TRACE_CBID_cuMemRelease = 550,
564
+ CUPTI_DRIVER_TRACE_CBID_cuMemMap = 551,
565
+ CUPTI_DRIVER_TRACE_CBID_cuMemUnmap = 552,
566
+ CUPTI_DRIVER_TRACE_CBID_cuMemSetAccess = 553,
567
+ CUPTI_DRIVER_TRACE_CBID_cuMemExportToShareableHandle = 554,
568
+ CUPTI_DRIVER_TRACE_CBID_cuMemImportFromShareableHandle = 555,
569
+ CUPTI_DRIVER_TRACE_CBID_cuMemGetAllocationGranularity = 556,
570
+ CUPTI_DRIVER_TRACE_CBID_cuMemGetAllocationPropertiesFromHandle = 557,
571
+ CUPTI_DRIVER_TRACE_CBID_cuMemGetAccess = 558,
572
+ CUPTI_DRIVER_TRACE_CBID_cuStreamSetFlags = 559,
573
+ CUPTI_DRIVER_TRACE_CBID_cuStreamSetFlags_ptsz = 560,
574
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecUpdate = 561,
575
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecMemcpyNodeSetParams = 562,
576
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecMemsetNodeSetParams = 563,
577
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecHostNodeSetParams = 564,
578
+ CUPTI_DRIVER_TRACE_CBID_cuMemRetainAllocationHandle = 565,
579
+ CUPTI_DRIVER_TRACE_CBID_cuFuncGetModule = 566,
580
+ CUPTI_DRIVER_TRACE_CBID_cuIpcOpenMemHandle_v2 = 567,
581
+ CUPTI_DRIVER_TRACE_CBID_cuCtxResetPersistingL2Cache = 568,
582
+ CUPTI_DRIVER_TRACE_CBID_cuGraphKernelNodeCopyAttributes = 569,
583
+ CUPTI_DRIVER_TRACE_CBID_cuGraphKernelNodeGetAttribute = 570,
584
+ CUPTI_DRIVER_TRACE_CBID_cuGraphKernelNodeSetAttribute = 571,
585
+ CUPTI_DRIVER_TRACE_CBID_cuStreamCopyAttributes = 572,
586
+ CUPTI_DRIVER_TRACE_CBID_cuStreamCopyAttributes_ptsz = 573,
587
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetAttribute = 574,
588
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetAttribute_ptsz = 575,
589
+ CUPTI_DRIVER_TRACE_CBID_cuStreamSetAttribute = 576,
590
+ CUPTI_DRIVER_TRACE_CBID_cuStreamSetAttribute_ptsz = 577,
591
+ CUPTI_DRIVER_TRACE_CBID_cuGraphInstantiate_v2 = 578,
592
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetTexture1DLinearMaxWidth = 579,
593
+ CUPTI_DRIVER_TRACE_CBID_cuGraphUpload = 580,
594
+ CUPTI_DRIVER_TRACE_CBID_cuGraphUpload_ptsz = 581,
595
+ CUPTI_DRIVER_TRACE_CBID_cuArrayGetSparseProperties = 582,
596
+ CUPTI_DRIVER_TRACE_CBID_cuMipmappedArrayGetSparseProperties = 583,
597
+ CUPTI_DRIVER_TRACE_CBID_cuMemMapArrayAsync = 584,
598
+ CUPTI_DRIVER_TRACE_CBID_cuMemMapArrayAsync_ptsz = 585,
599
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecChildGraphNodeSetParams = 586,
600
+ CUPTI_DRIVER_TRACE_CBID_cuEventRecordWithFlags = 587,
601
+ CUPTI_DRIVER_TRACE_CBID_cuEventRecordWithFlags_ptsz = 588,
602
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddEventRecordNode = 589,
603
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddEventWaitNode = 590,
604
+ CUPTI_DRIVER_TRACE_CBID_cuGraphEventRecordNodeGetEvent = 591,
605
+ CUPTI_DRIVER_TRACE_CBID_cuGraphEventWaitNodeGetEvent = 592,
606
+ CUPTI_DRIVER_TRACE_CBID_cuGraphEventRecordNodeSetEvent = 593,
607
+ CUPTI_DRIVER_TRACE_CBID_cuGraphEventWaitNodeSetEvent = 594,
608
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecEventRecordNodeSetEvent = 595,
609
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecEventWaitNodeSetEvent = 596,
610
+ CUPTI_DRIVER_TRACE_CBID_cuArrayGetPlane = 597,
611
+ CUPTI_DRIVER_TRACE_CBID_cuMemAllocAsync = 598,
612
+ CUPTI_DRIVER_TRACE_CBID_cuMemAllocAsync_ptsz = 599,
613
+ CUPTI_DRIVER_TRACE_CBID_cuMemFreeAsync = 600,
614
+ CUPTI_DRIVER_TRACE_CBID_cuMemFreeAsync_ptsz = 601,
615
+ CUPTI_DRIVER_TRACE_CBID_cuMemPoolTrimTo = 602,
616
+ CUPTI_DRIVER_TRACE_CBID_cuMemPoolSetAttribute = 603,
617
+ CUPTI_DRIVER_TRACE_CBID_cuMemPoolGetAttribute = 604,
618
+ CUPTI_DRIVER_TRACE_CBID_cuMemPoolSetAccess = 605,
619
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetDefaultMemPool = 606,
620
+ CUPTI_DRIVER_TRACE_CBID_cuMemPoolCreate = 607,
621
+ CUPTI_DRIVER_TRACE_CBID_cuMemPoolDestroy = 608,
622
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceSetMemPool = 609,
623
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetMemPool = 610,
624
+ CUPTI_DRIVER_TRACE_CBID_cuMemAllocFromPoolAsync = 611,
625
+ CUPTI_DRIVER_TRACE_CBID_cuMemAllocFromPoolAsync_ptsz = 612,
626
+ CUPTI_DRIVER_TRACE_CBID_cuMemPoolExportToShareableHandle = 613,
627
+ CUPTI_DRIVER_TRACE_CBID_cuMemPoolImportFromShareableHandle = 614,
628
+ CUPTI_DRIVER_TRACE_CBID_cuMemPoolExportPointer = 615,
629
+ CUPTI_DRIVER_TRACE_CBID_cuMemPoolImportPointer = 616,
630
+ CUPTI_DRIVER_TRACE_CBID_cuMemPoolGetAccess = 617,
631
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddExternalSemaphoresSignalNode = 618,
632
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExternalSemaphoresSignalNodeGetParams = 619,
633
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExternalSemaphoresSignalNodeSetParams = 620,
634
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddExternalSemaphoresWaitNode = 621,
635
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExternalSemaphoresWaitNodeGetParams = 622,
636
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExternalSemaphoresWaitNodeSetParams = 623,
637
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecExternalSemaphoresSignalNodeSetParams = 624,
638
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecExternalSemaphoresWaitNodeSetParams = 625,
639
+ CUPTI_DRIVER_TRACE_CBID_cuGetProcAddress = 626,
640
+ CUPTI_DRIVER_TRACE_CBID_cuFlushGPUDirectRDMAWrites = 627,
641
+ CUPTI_DRIVER_TRACE_CBID_cuGraphDebugDotPrint = 628,
642
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetCaptureInfo_v2 = 629,
643
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetCaptureInfo_v2_ptsz = 630,
644
+ CUPTI_DRIVER_TRACE_CBID_cuStreamUpdateCaptureDependencies = 631,
645
+ CUPTI_DRIVER_TRACE_CBID_cuStreamUpdateCaptureDependencies_ptsz = 632,
646
+ CUPTI_DRIVER_TRACE_CBID_cuUserObjectCreate = 633,
647
+ CUPTI_DRIVER_TRACE_CBID_cuUserObjectRetain = 634,
648
+ CUPTI_DRIVER_TRACE_CBID_cuUserObjectRelease = 635,
649
+ CUPTI_DRIVER_TRACE_CBID_cuGraphRetainUserObject = 636,
650
+ CUPTI_DRIVER_TRACE_CBID_cuGraphReleaseUserObject = 637,
651
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddMemAllocNode = 638,
652
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddMemFreeNode = 639,
653
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGraphMemTrim = 640,
654
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetGraphMemAttribute = 641,
655
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceSetGraphMemAttribute = 642,
656
+ CUPTI_DRIVER_TRACE_CBID_cuGraphInstantiateWithFlags = 643,
657
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetExecAffinitySupport = 644,
658
+ CUPTI_DRIVER_TRACE_CBID_cuCtxCreate_v3 = 645,
659
+ CUPTI_DRIVER_TRACE_CBID_cuCtxGetExecAffinity = 646,
660
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetUuid_v2 = 647,
661
+ CUPTI_DRIVER_TRACE_CBID_cuGraphMemAllocNodeGetParams = 648,
662
+ CUPTI_DRIVER_TRACE_CBID_cuGraphMemFreeNodeGetParams = 649,
663
+ CUPTI_DRIVER_TRACE_CBID_cuGraphNodeSetEnabled = 650,
664
+ CUPTI_DRIVER_TRACE_CBID_cuGraphNodeGetEnabled = 651,
665
+ CUPTI_DRIVER_TRACE_CBID_cuLaunchKernelEx = 652,
666
+ CUPTI_DRIVER_TRACE_CBID_cuLaunchKernelEx_ptsz = 653,
667
+ CUPTI_DRIVER_TRACE_CBID_cuArrayGetMemoryRequirements = 654,
668
+ CUPTI_DRIVER_TRACE_CBID_cuMipmappedArrayGetMemoryRequirements = 655,
669
+ CUPTI_DRIVER_TRACE_CBID_cuGraphInstantiateWithParams = 656,
670
+ CUPTI_DRIVER_TRACE_CBID_cuGraphInstantiateWithParams_ptsz = 657,
671
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecGetFlags = 658,
672
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWaitValue32_v2 = 659,
673
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWaitValue32_v2_ptsz = 660,
674
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWaitValue64_v2 = 661,
675
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWaitValue64_v2_ptsz = 662,
676
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWriteValue32_v2 = 663,
677
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWriteValue32_v2_ptsz = 664,
678
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWriteValue64_v2 = 665,
679
+ CUPTI_DRIVER_TRACE_CBID_cuStreamWriteValue64_v2_ptsz = 666,
680
+ CUPTI_DRIVER_TRACE_CBID_cuStreamBatchMemOp_v2 = 667,
681
+ CUPTI_DRIVER_TRACE_CBID_cuStreamBatchMemOp_v2_ptsz = 668,
682
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddBatchMemOpNode = 669,
683
+ CUPTI_DRIVER_TRACE_CBID_cuGraphBatchMemOpNodeGetParams = 670,
684
+ CUPTI_DRIVER_TRACE_CBID_cuGraphBatchMemOpNodeSetParams = 671,
685
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecBatchMemOpNodeSetParams = 672,
686
+ CUPTI_DRIVER_TRACE_CBID_cuModuleGetLoadingMode = 673,
687
+ CUPTI_DRIVER_TRACE_CBID_cuMemGetHandleForAddressRange = 674,
688
+ CUPTI_DRIVER_TRACE_CBID_cuOccupancyMaxPotentialClusterSize = 675,
689
+ CUPTI_DRIVER_TRACE_CBID_cuOccupancyMaxActiveClusters = 676,
690
+ CUPTI_DRIVER_TRACE_CBID_cuGetProcAddress_v2 = 677,
691
+ CUPTI_DRIVER_TRACE_CBID_cuLibraryLoadData = 678,
692
+ CUPTI_DRIVER_TRACE_CBID_cuLibraryLoadFromFile = 679,
693
+ CUPTI_DRIVER_TRACE_CBID_cuLibraryUnload = 680,
694
+ CUPTI_DRIVER_TRACE_CBID_cuLibraryGetKernel = 681,
695
+ CUPTI_DRIVER_TRACE_CBID_cuLibraryGetModule = 682,
696
+ CUPTI_DRIVER_TRACE_CBID_cuKernelGetFunction = 683,
697
+ CUPTI_DRIVER_TRACE_CBID_cuLibraryGetGlobal = 684,
698
+ CUPTI_DRIVER_TRACE_CBID_cuLibraryGetManaged = 685,
699
+ CUPTI_DRIVER_TRACE_CBID_cuKernelGetAttribute = 686,
700
+ CUPTI_DRIVER_TRACE_CBID_cuKernelSetAttribute = 687,
701
+ CUPTI_DRIVER_TRACE_CBID_cuKernelSetCacheConfig = 688,
702
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddKernelNode_v2 = 689,
703
+ CUPTI_DRIVER_TRACE_CBID_cuGraphKernelNodeGetParams_v2 = 690,
704
+ CUPTI_DRIVER_TRACE_CBID_cuGraphKernelNodeSetParams_v2 = 691,
705
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecKernelNodeSetParams_v2 = 692,
706
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetId = 693,
707
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetId_ptsz = 694,
708
+ CUPTI_DRIVER_TRACE_CBID_cuCtxGetId = 695,
709
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecUpdate_v2 = 696,
710
+ CUPTI_DRIVER_TRACE_CBID_cuTensorMapEncodeTiled = 697,
711
+ CUPTI_DRIVER_TRACE_CBID_cuTensorMapEncodeIm2col = 698,
712
+ CUPTI_DRIVER_TRACE_CBID_cuTensorMapReplaceAddress = 699,
713
+ CUPTI_DRIVER_TRACE_CBID_cuLibraryGetUnifiedFunction = 700,
714
+ CUPTI_DRIVER_TRACE_CBID_cuCoredumpGetAttribute = 701,
715
+ CUPTI_DRIVER_TRACE_CBID_cuCoredumpGetAttributeGlobal = 702,
716
+ CUPTI_DRIVER_TRACE_CBID_cuCoredumpSetAttribute = 703,
717
+ CUPTI_DRIVER_TRACE_CBID_cuCoredumpSetAttributeGlobal = 704,
718
+ CUPTI_DRIVER_TRACE_CBID_cuCtxSetFlags = 705,
719
+ CUPTI_DRIVER_TRACE_CBID_cuMulticastCreate = 706,
720
+ CUPTI_DRIVER_TRACE_CBID_cuMulticastAddDevice = 707,
721
+ CUPTI_DRIVER_TRACE_CBID_cuMulticastBindMem = 708,
722
+ CUPTI_DRIVER_TRACE_CBID_cuMulticastBindAddr = 709,
723
+ CUPTI_DRIVER_TRACE_CBID_cuMulticastUnbind = 710,
724
+ CUPTI_DRIVER_TRACE_CBID_cuMulticastGetGranularity = 711,
725
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddNode = 712,
726
+ CUPTI_DRIVER_TRACE_CBID_cuGraphNodeSetParams = 713,
727
+ CUPTI_DRIVER_TRACE_CBID_cuGraphExecNodeSetParams = 714,
728
+ CUPTI_DRIVER_TRACE_CBID_cuMemAdvise_v2 = 715,
729
+ CUPTI_DRIVER_TRACE_CBID_cuMemPrefetchAsync_v2 = 716,
730
+ CUPTI_DRIVER_TRACE_CBID_cuMemPrefetchAsync_v2_ptsz = 717,
731
+ CUPTI_DRIVER_TRACE_CBID_cuFuncGetName = 718,
732
+ CUPTI_DRIVER_TRACE_CBID_cuKernelGetName = 719,
733
+ CUPTI_DRIVER_TRACE_CBID_cuStreamBeginCaptureToGraph = 720,
734
+ CUPTI_DRIVER_TRACE_CBID_cuStreamBeginCaptureToGraph_ptsz = 721,
735
+ CUPTI_DRIVER_TRACE_CBID_cuGraphConditionalHandleCreate = 722,
736
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddNode_v2 = 723,
737
+ CUPTI_DRIVER_TRACE_CBID_cuGraphGetEdges_v2 = 724,
738
+ CUPTI_DRIVER_TRACE_CBID_cuGraphNodeGetDependencies_v2 = 725,
739
+ CUPTI_DRIVER_TRACE_CBID_cuGraphNodeGetDependentNodes_v2 = 726,
740
+ CUPTI_DRIVER_TRACE_CBID_cuGraphAddDependencies_v2 = 727,
741
+ CUPTI_DRIVER_TRACE_CBID_cuGraphRemoveDependencies_v2 = 728,
742
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetCaptureInfo_v3 = 729,
743
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetCaptureInfo_v3_ptsz = 730,
744
+ CUPTI_DRIVER_TRACE_CBID_cuStreamUpdateCaptureDependencies_v2 = 731,
745
+ CUPTI_DRIVER_TRACE_CBID_cuStreamUpdateCaptureDependencies_v2_ptsz = 732,
746
+ CUPTI_DRIVER_TRACE_CBID_cuFuncGetParamInfo = 733,
747
+ CUPTI_DRIVER_TRACE_CBID_cuKernelGetParamInfo = 734,
748
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceRegisterAsyncNotification = 735,
749
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceUnregisterAsyncNotification = 736,
750
+ CUPTI_DRIVER_TRACE_CBID_cuModuleGetFunctionCount = 737,
751
+ CUPTI_DRIVER_TRACE_CBID_cuModuleEnumerateFunctions = 738,
752
+ CUPTI_DRIVER_TRACE_CBID_cuLibraryGetKernelCount = 739,
753
+ CUPTI_DRIVER_TRACE_CBID_cuLibraryEnumerateKernels = 740,
754
+ CUPTI_DRIVER_TRACE_CBID_cuFuncIsLoaded = 741,
755
+ CUPTI_DRIVER_TRACE_CBID_cuFuncLoad = 742,
756
+ CUPTI_DRIVER_TRACE_CBID_cuGreenCtxCreate = 743,
757
+ CUPTI_DRIVER_TRACE_CBID_cuGreenCtxDestroy = 744,
758
+ CUPTI_DRIVER_TRACE_CBID_cuDeviceGetDevResource = 745,
759
+ CUPTI_DRIVER_TRACE_CBID_cuCtxGetDevResource = 746,
760
+ CUPTI_DRIVER_TRACE_CBID_cuGreenCtxGetDevResource = 747,
761
+ CUPTI_DRIVER_TRACE_CBID_cuDevResourceGenerateDesc = 748,
762
+ CUPTI_DRIVER_TRACE_CBID_cuGreenCtxRecordEvent = 749,
763
+ CUPTI_DRIVER_TRACE_CBID_cuGreenCtxWaitEvent = 750,
764
+ CUPTI_DRIVER_TRACE_CBID_cuDevSmResourceSplitByCount = 751,
765
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetGreenCtx = 752,
766
+ CUPTI_DRIVER_TRACE_CBID_cuCtxFromGreenCtx = 753,
767
+ CUPTI_DRIVER_TRACE_CBID_cuKernelGetLibrary = 754,
768
+ CUPTI_DRIVER_TRACE_CBID_cuCtxRecordEvent = 755,
769
+ CUPTI_DRIVER_TRACE_CBID_cuCtxWaitEvent = 756,
770
+ CUPTI_DRIVER_TRACE_CBID_cuCtxCreate_v4 = 757,
771
+ CUPTI_DRIVER_TRACE_CBID_cuGreenCtxStreamCreate = 758,
772
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetCtx_v2 = 759,
773
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetCtx_v2_ptsz = 760,
774
+ CUPTI_DRIVER_TRACE_CBID_cuMemBatchDecompressAsync = 761,
775
+ CUPTI_DRIVER_TRACE_CBID_cuMemBatchDecompressAsync_ptsz = 762,
776
+ CUPTI_DRIVER_TRACE_CBID_cuLogsRegisterCallback = 763,
777
+ CUPTI_DRIVER_TRACE_CBID_cuLogsUnregisterCallback = 764,
778
+ CUPTI_DRIVER_TRACE_CBID_cuLogsCurrent = 765,
779
+ CUPTI_DRIVER_TRACE_CBID_cuLogsDumpToFile = 766,
780
+ CUPTI_DRIVER_TRACE_CBID_cuLogsDumpToMemory = 767,
781
+ CUPTI_DRIVER_TRACE_CBID_cuCheckpointProcessGetRestoreThreadId = 768,
782
+ CUPTI_DRIVER_TRACE_CBID_cuCheckpointProcessGetState = 769,
783
+ CUPTI_DRIVER_TRACE_CBID_cuCheckpointProcessLock = 770,
784
+ CUPTI_DRIVER_TRACE_CBID_cuCheckpointProcessCheckpoint = 771,
785
+ CUPTI_DRIVER_TRACE_CBID_cuCheckpointProcessRestore = 772,
786
+ CUPTI_DRIVER_TRACE_CBID_cuCheckpointProcessUnlock = 773,
787
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetDevice = 774,
788
+ CUPTI_DRIVER_TRACE_CBID_cuStreamGetDevice_ptsz = 775,
789
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyBatchAsync = 776,
790
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpyBatchAsync_ptsz = 777,
791
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DBatchAsync = 778,
792
+ CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DBatchAsync_ptsz = 779,
793
+ CUPTI_DRIVER_TRACE_CBID_cuEventElapsedTime_v2 = 780,
794
+ CUPTI_DRIVER_TRACE_CBID_cuTensorMapEncodeIm2colWide = 781,
795
+ CUPTI_DRIVER_TRACE_CBID_SIZE = 782,
796
+ CUPTI_DRIVER_TRACE_CBID_FORCE_INT = 0x7fffffff
797
+ } CUpti_driver_api_trace_cbid;
798
+
799
+ #endif
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_events.h ADDED
@@ -0,0 +1,1349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2010-2024 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_EVENTS_H_)
51
+ #define _CUPTI_EVENTS_H_
52
+
53
+ #include <cuda.h>
54
+ #include <string.h>
55
+ #include <cuda_stdint.h>
56
+ #include <cupti_result.h>
57
+
58
+ #ifndef CUPTIAPI
59
+ #ifdef _WIN32
60
+ #define CUPTIAPI __stdcall
61
+ #else
62
+ #define CUPTIAPI
63
+ #endif
64
+ #endif
65
+
66
+ #if defined(__cplusplus)
67
+ extern "C" {
68
+ #endif
69
+
70
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
71
+ #pragma GCC visibility push(default)
72
+ #endif
73
+
74
+ /**
75
+ * \defgroup CUPTI_EVENT_API CUPTI Event API
76
+ * Functions, types, and enums that implement the CUPTI Event API.
77
+ *
78
+ * \note The CUPTI event API from the header cupti_events.h is not supported on devices
79
+ * with compute capability 7.5 and higher (i.e. Turing and later GPU architectures).
80
+ * This API is deprecated in CUDA 12.8 release and will be removed in a future CUDA release.
81
+ * This is replaced by the host profiling API in the header cupti_profiler_host.h and
82
+ * target profiling API in the header cupti_range_profiler.h which are supported on
83
+ * devices with compute capability 7.0 and higher (i.e. Volta and later GPU architectures).
84
+ *
85
+ * @{
86
+ */
87
+
88
+ /**
89
+ * \brief ID for an event.
90
+ *
91
+ * An event represents a countable activity, action, or occurrence on
92
+ * the device.
93
+ */
94
+ typedef uint32_t CUpti_EventID;
95
+
96
+ /**
97
+ * \brief ID for an event domain.
98
+ *
99
+ * ID for an event domain. An event domain represents a group of
100
+ * related events. A device may have multiple instances of a domain,
101
+ * indicating that the device can simultaneously record multiple
102
+ * instances of each event within that domain.
103
+ */
104
+ typedef uint32_t CUpti_EventDomainID;
105
+
106
+ /**
107
+ * \brief A group of events.
108
+ *
109
+ * An event group is a collection of events that are managed
110
+ * together. All events in an event group must belong to the same
111
+ * domain.
112
+ */
113
+ typedef void *CUpti_EventGroup;
114
+
115
+ /**
116
+ * \brief Device class.
117
+ *
118
+ * Enumeration of device classes for device attribute
119
+ * CUPTI_DEVICE_ATTR_DEVICE_CLASS.
120
+ */
121
+ typedef enum {
122
+ CUPTI_DEVICE_ATTR_DEVICE_CLASS_TESLA = 0,
123
+ CUPTI_DEVICE_ATTR_DEVICE_CLASS_QUADRO = 1,
124
+ CUPTI_DEVICE_ATTR_DEVICE_CLASS_GEFORCE = 2,
125
+ CUPTI_DEVICE_ATTR_DEVICE_CLASS_TEGRA = 3,
126
+ } CUpti_DeviceAttributeDeviceClass;
127
+
128
+ /**
129
+ * \brief Device attributes.
130
+ *
131
+ * CUPTI device attributes. These attributes can be read using \ref
132
+ * cuptiDeviceGetAttribute.
133
+ */
134
+ typedef enum {
135
+ /**
136
+ * Number of event IDs for a device. Value is a uint32_t.
137
+ */
138
+ CUPTI_DEVICE_ATTR_MAX_EVENT_ID = 1,
139
+ /**
140
+ * Number of event domain IDs for a device. Value is a uint32_t.
141
+ */
142
+ CUPTI_DEVICE_ATTR_MAX_EVENT_DOMAIN_ID = 2,
143
+ /**
144
+ * Get global memory bandwidth in Kbytes/sec. Value is a uint64_t.
145
+ */
146
+ CUPTI_DEVICE_ATTR_GLOBAL_MEMORY_BANDWIDTH = 3,
147
+ /**
148
+ * Get theoretical maximum number of instructions per cycle. Value
149
+ * is a uint32_t.
150
+ */
151
+ CUPTI_DEVICE_ATTR_INSTRUCTION_PER_CYCLE = 4,
152
+ /**
153
+ * Get theoretical maximum number of single precision instructions
154
+ * that can be executed per second. Value is a uint64_t.
155
+ */
156
+ CUPTI_DEVICE_ATTR_INSTRUCTION_THROUGHPUT_SINGLE_PRECISION = 5,
157
+ /**
158
+ * Get number of frame buffers for device. Value is a uint64_t.
159
+ */
160
+ CUPTI_DEVICE_ATTR_MAX_FRAME_BUFFERS = 6,
161
+ /**
162
+ * Get PCIE link rate in Mega bits/sec for device. Return 0 if bus-type
163
+ * is non-PCIE. Value is a uint64_t.
164
+ */
165
+ CUPTI_DEVICE_ATTR_PCIE_LINK_RATE = 7,
166
+ /**
167
+ * Get PCIE link width for device. Return 0 if bus-type
168
+ * is non-PCIE. Value is a uint64_t.
169
+ */
170
+ CUPTI_DEVICE_ATTR_PCIE_LINK_WIDTH = 8,
171
+ /**
172
+ * Get PCIE generation for device. Return 0 if bus-type
173
+ * is non-PCIE. Value is a uint64_t.
174
+ */
175
+ CUPTI_DEVICE_ATTR_PCIE_GEN = 9,
176
+ /**
177
+ * Get the class for the device. Value is a
178
+ * CUpti_DeviceAttributeDeviceClass.
179
+ */
180
+ CUPTI_DEVICE_ATTR_DEVICE_CLASS = 10,
181
+ /**
182
+ * Get the peak single precision flop per cycle. Value is a uint64_t.
183
+ */
184
+ CUPTI_DEVICE_ATTR_FLOP_SP_PER_CYCLE = 11,
185
+ /**
186
+ * Get the peak double precision flop per cycle. Value is a uint64_t.
187
+ */
188
+ CUPTI_DEVICE_ATTR_FLOP_DP_PER_CYCLE = 12,
189
+ /**
190
+ * Get number of L2 units. Value is a uint64_t.
191
+ */
192
+ CUPTI_DEVICE_ATTR_MAX_L2_UNITS = 13,
193
+ /**
194
+ * Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_SHARED
195
+ * preference. Value is a uint64_t.
196
+ */
197
+ CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_SHARED = 14,
198
+ /**
199
+ * Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_L1
200
+ * preference. Value is a uint64_t.
201
+ */
202
+ CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_L1 = 15,
203
+ /**
204
+ * Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_EQUAL
205
+ * preference. Value is a uint64_t.
206
+ */
207
+ CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_EQUAL = 16,
208
+ /**
209
+ * Get the peak half precision flop per cycle. Value is a uint64_t.
210
+ */
211
+ CUPTI_DEVICE_ATTR_FLOP_HP_PER_CYCLE = 17,
212
+ /**
213
+ * Check if Nvlink is connected to device. Returns 1, if at least one
214
+ * Nvlink is connected to the device, returns 0 otherwise.
215
+ * Value is a uint32_t.
216
+ */
217
+ CUPTI_DEVICE_ATTR_NVLINK_PRESENT = 18,
218
+ /**
219
+ * Check if Nvlink is present between GPU and CPU. Returns Bandwidth,
220
+ * in Bytes/sec, if Nvlink is present, returns 0 otherwise.
221
+ * Value is a uint64_t.
222
+ */
223
+ CUPTI_DEVICE_ATTR_GPU_CPU_NVLINK_BW = 19,
224
+ /**
225
+ * Check if NVSwitch is present in the underlying topology.
226
+ * Returns 1, if present, returns 0 otherwise.
227
+ * Value is a uint32_t.
228
+ */
229
+ CUPTI_DEVICE_ATTR_NVSWITCH_PRESENT = 20,
230
+ CUPTI_DEVICE_ATTR_FORCE_INT = 0x7fffffff,
231
+ } CUpti_DeviceAttribute;
232
+
233
+ /**
234
+ * \brief Event domain attributes.
235
+ *
236
+ * Event domain attributes. Except where noted, all the attributes can
237
+ * be read using either \ref cuptiDeviceGetEventDomainAttribute or
238
+ * \ref cuptiEventDomainGetAttribute.
239
+ */
240
+ typedef enum {
241
+ /**
242
+ * Event domain name. Value is a null terminated const c-string.
243
+ */
244
+ CUPTI_EVENT_DOMAIN_ATTR_NAME = 0,
245
+ /**
246
+ * Number of instances of the domain for which event counts will be
247
+ * collected. The domain may have additional instances that cannot
248
+ * be profiled (see CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT).
249
+ * Can be read only with \ref
250
+ * cuptiDeviceGetEventDomainAttribute. Value is a uint32_t.
251
+ */
252
+ CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT = 1,
253
+ /**
254
+ * Total number of instances of the domain, including instances that
255
+ * cannot be profiled. Use CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT
256
+ * to get the number of instances that can be profiled. Can be read
257
+ * only with \ref cuptiDeviceGetEventDomainAttribute. Value is a
258
+ * uint32_t.
259
+ */
260
+ CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT = 3,
261
+ /**
262
+ * Collection method used for events contained in the event domain.
263
+ * Value is a \ref CUpti_EventCollectionMethod.
264
+ */
265
+ CUPTI_EVENT_DOMAIN_ATTR_COLLECTION_METHOD = 4,
266
+
267
+ CUPTI_EVENT_DOMAIN_ATTR_FORCE_INT = 0x7fffffff,
268
+ } CUpti_EventDomainAttribute;
269
+
270
+ /**
271
+ * \brief The collection method used for an event.
272
+ *
273
+ * The collection method indicates how an event is collected.
274
+ */
275
+ typedef enum {
276
+ /**
277
+ * Event is collected using a hardware global performance monitor.
278
+ */
279
+ CUPTI_EVENT_COLLECTION_METHOD_PM = 0,
280
+ /**
281
+ * Event is collected using a hardware SM performance monitor.
282
+ */
283
+ CUPTI_EVENT_COLLECTION_METHOD_SM = 1,
284
+ /**
285
+ * Event is collected using software instrumentation.
286
+ */
287
+ CUPTI_EVENT_COLLECTION_METHOD_INSTRUMENTED = 2,
288
+ /**
289
+ * Event is collected using NvLink throughput counter method.
290
+ */
291
+ CUPTI_EVENT_COLLECTION_METHOD_NVLINK_TC = 3,
292
+ CUPTI_EVENT_COLLECTION_METHOD_FORCE_INT = 0x7fffffff
293
+ } CUpti_EventCollectionMethod;
294
+
295
+ /**
296
+ * \brief Event group attributes.
297
+ *
298
+ * Event group attributes. These attributes can be read using \ref
299
+ * cuptiEventGroupGetAttribute. Attributes marked [rw] can also be
300
+ * written using \ref cuptiEventGroupSetAttribute.
301
+ */
302
+ typedef enum {
303
+ /**
304
+ * The domain to which the event group is bound. This attribute is
305
+ * set when the first event is added to the group. Value is a
306
+ * CUpti_EventDomainID.
307
+ */
308
+ CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID = 0,
309
+ /**
310
+ * [rw] Profile all the instances of the domain for this
311
+ * eventgroup. This feature can be used to get load balancing
312
+ * across all instances of a domain. Value is an integer.
313
+ */
314
+ CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES = 1,
315
+ /**
316
+ * [rw] Reserved for user data.
317
+ */
318
+ CUPTI_EVENT_GROUP_ATTR_USER_DATA = 2,
319
+ /**
320
+ * Number of events in the group. Value is a uint32_t.
321
+ */
322
+ CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS = 3,
323
+ /**
324
+ * Enumerates events in the group. Value is a pointer to buffer of
325
+ * size sizeof(CUpti_EventID) * num_of_events in the eventgroup.
326
+ * num_of_events can be queried using
327
+ * CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS.
328
+ */
329
+ CUPTI_EVENT_GROUP_ATTR_EVENTS = 4,
330
+ /**
331
+ * Number of instances of the domain bound to this event group that
332
+ * will be counted. Value is a uint32_t.
333
+ */
334
+ CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT = 5,
335
+ /**
336
+ * Event group scope can be set to CUPTI_EVENT_PROFILING_SCOPE_DEVICE or
337
+ * CUPTI_EVENT_PROFILING_SCOPE_CONTEXT for an eventGroup, before
338
+ * adding any event.
339
+ * Sets the scope of eventgroup as CUPTI_EVENT_PROFILING_SCOPE_DEVICE or
340
+ * CUPTI_EVENT_PROFILING_SCOPE_CONTEXT when the scope of the events
341
+ * that will be added is CUPTI_EVENT_PROFILING_SCOPE_BOTH.
342
+ * If profiling scope of event is either
343
+ * CUPTI_EVENT_PROFILING_SCOPE_DEVICE or CUPTI_EVENT_PROFILING_SCOPE_CONTEXT
344
+ * then setting this attribute will not affect the default scope.
345
+ * It is not allowed to add events of different scope to same eventgroup.
346
+ * Value is a uint32_t.
347
+ */
348
+ CUPTI_EVENT_GROUP_ATTR_PROFILING_SCOPE = 6,
349
+ CUPTI_EVENT_GROUP_ATTR_FORCE_INT = 0x7fffffff,
350
+ } CUpti_EventGroupAttribute;
351
+
352
+ /**
353
+ * \brief Profiling scope for event.
354
+ *
355
+ * Profiling scope of event indicates if the event can be collected at context
356
+ * scope or device scope or both i.e. it can be collected at any of context or
357
+ * device scope.
358
+ */
359
+ typedef enum {
360
+ /**
361
+ * Event is collected at context scope.
362
+ */
363
+ CUPTI_EVENT_PROFILING_SCOPE_CONTEXT = 0,
364
+ /**
365
+ * Event is collected at device scope.
366
+ */
367
+ CUPTI_EVENT_PROFILING_SCOPE_DEVICE = 1,
368
+ /**
369
+ * Event can be collected at device or context scope.
370
+ * The scope can be set using \ref cuptiEventGroupSetAttribute API.
371
+ */
372
+ CUPTI_EVENT_PROFILING_SCOPE_BOTH = 2,
373
+ CUPTI_EVENT_PROFILING_SCOPE_FORCE_INT = 0x7fffffff
374
+ } CUpti_EventProfilingScope;
375
+
376
+ /**
377
+ * \brief Event attributes.
378
+ *
379
+ * Event attributes. These attributes can be read using \ref
380
+ * cuptiEventGetAttribute.
381
+ */
382
+ typedef enum {
383
+ /**
384
+ * Event name. Value is a null terminated const c-string.
385
+ */
386
+ CUPTI_EVENT_ATTR_NAME = 0,
387
+ /**
388
+ * Short description of event. Value is a null terminated const
389
+ * c-string.
390
+ */
391
+ CUPTI_EVENT_ATTR_SHORT_DESCRIPTION = 1,
392
+ /**
393
+ * Long description of event. Value is a null terminated const
394
+ * c-string.
395
+ */
396
+ CUPTI_EVENT_ATTR_LONG_DESCRIPTION = 2,
397
+ /**
398
+ * Category of event. Value is CUpti_EventCategory.
399
+ */
400
+ CUPTI_EVENT_ATTR_CATEGORY = 3,
401
+ /**
402
+ * Profiling scope of the events. It can be either device or context or both.
403
+ * Value is a \ref CUpti_EventProfilingScope.
404
+ */
405
+ CUPTI_EVENT_ATTR_PROFILING_SCOPE = 5,
406
+
407
+ CUPTI_EVENT_ATTR_FORCE_INT = 0x7fffffff,
408
+ } CUpti_EventAttribute;
409
+
410
+ /**
411
+ * \brief Event collection modes.
412
+ *
413
+ * The event collection mode determines the period over which the
414
+ * events within the enabled event groups will be collected.
415
+ */
416
+ typedef enum {
417
+ /**
418
+ * Events are collected for the entire duration between the
419
+ * cuptiEventGroupEnable and cuptiEventGroupDisable calls.
420
+ * Event values are reset when the events are read.
421
+ * For CUDA toolkit v6.0 and older this was the default mode.
422
+ */
423
+ CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS = 0,
424
+ /**
425
+ * Events are collected only for the durations of kernel executions
426
+ * that occur between the cuptiEventGroupEnable and
427
+ * cuptiEventGroupDisable calls. Event collection begins when a
428
+ * kernel execution begins, and stops when kernel execution
429
+ * completes. Event values are reset to zero when each kernel
430
+ * execution begins. If multiple kernel executions occur between the
431
+ * cuptiEventGroupEnable and cuptiEventGroupDisable calls then the
432
+ * event values must be read after each kernel launch if those
433
+ * events need to be associated with the specific kernel launch.
434
+ * Note that collection in this mode may significantly change the
435
+ * overall performance characteristics of the application because
436
+ * kernel executions that occur between the cuptiEventGroupEnable and
437
+ * cuptiEventGroupDisable calls are serialized on the GPU.
438
+ * This is the default mode from CUDA toolkit v6.5
439
+ */
440
+ CUPTI_EVENT_COLLECTION_MODE_KERNEL = 1,
441
+ CUPTI_EVENT_COLLECTION_MODE_FORCE_INT = 0x7fffffff
442
+ } CUpti_EventCollectionMode;
443
+
444
+ /**
445
+ * \brief An event category.
446
+ *
447
+ * Each event is assigned to a category that represents the general
448
+ * type of the event. A event's category is accessed using \ref
449
+ * cuptiEventGetAttribute and the CUPTI_EVENT_ATTR_CATEGORY attribute.
450
+ */
451
+ typedef enum {
452
+ /**
453
+ * An instruction related event.
454
+ */
455
+ CUPTI_EVENT_CATEGORY_INSTRUCTION = 0,
456
+ /**
457
+ * A memory related event.
458
+ */
459
+ CUPTI_EVENT_CATEGORY_MEMORY = 1,
460
+ /**
461
+ * A cache related event.
462
+ */
463
+ CUPTI_EVENT_CATEGORY_CACHE = 2,
464
+ /**
465
+ * A profile-trigger event.
466
+ */
467
+ CUPTI_EVENT_CATEGORY_PROFILE_TRIGGER = 3,
468
+ /**
469
+ * A system event.
470
+ */
471
+ CUPTI_EVENT_CATEGORY_SYSTEM = 4,
472
+ CUPTI_EVENT_CATEGORY_FORCE_INT = 0x7fffffff
473
+ } CUpti_EventCategory;
474
+
475
+ /**
476
+ * \brief The overflow value for a CUPTI event.
477
+ *
478
+ * The CUPTI event value that indicates an overflow.
479
+ */
480
+ #define CUPTI_EVENT_OVERFLOW ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
481
+
482
+ /**
483
+ * \brief The value that indicates the event value is invalid
484
+ */
485
+ #define CUPTI_EVENT_INVALID ((uint64_t)0xFFFFFFFFFFFFFFFEULL)
486
+
487
+ /**
488
+ * \brief Flags for cuptiEventGroupReadEvent an
489
+ * cuptiEventGroupReadAllEvents.
490
+ *
491
+ * Flags for \ref cuptiEventGroupReadEvent an \ref
492
+ * cuptiEventGroupReadAllEvents.
493
+ */
494
+ typedef enum {
495
+ /**
496
+ * No flags.
497
+ */
498
+ CUPTI_EVENT_READ_FLAG_NONE = 0,
499
+ CUPTI_EVENT_READ_FLAG_FORCE_INT = 0x7fffffff,
500
+ } CUpti_ReadEventFlags;
501
+
502
+
503
+ /**
504
+ * \brief A set of event groups.
505
+ *
506
+ * A set of event groups. When returned by \ref
507
+ * cuptiEventGroupSetsCreate and \ref cuptiMetricCreateEventGroupSets
508
+ * a set indicates that event groups that can be enabled at the same
509
+ * time (i.e. all the events in the set can be collected
510
+ * simultaneously).
511
+ */
512
+ typedef struct {
513
+ /**
514
+ * The number of event groups in the set.
515
+ */
516
+ uint32_t numEventGroups;
517
+ /**
518
+ * An array of \p numEventGroups event groups.
519
+ */
520
+ CUpti_EventGroup *eventGroups;
521
+ } CUpti_EventGroupSet;
522
+
523
+ /**
524
+ * \brief A set of event group sets.
525
+ *
526
+ * A set of event group sets. When returned by \ref
527
+ * cuptiEventGroupSetsCreate and \ref cuptiMetricCreateEventGroupSets
528
+ * a CUpti_EventGroupSets indicates the number of passes required to
529
+ * collect all the events, and the event groups that should be
530
+ * collected during each pass.
531
+ */
532
+ typedef struct {
533
+ /**
534
+ * Number of event group sets.
535
+ */
536
+ uint32_t numSets;
537
+ /**
538
+ * An array of \p numSets event group sets.
539
+ */
540
+ CUpti_EventGroupSet *sets;
541
+ } CUpti_EventGroupSets;
542
+
543
+ /**
544
+ * \brief Set the event collection mode.
545
+ *
546
+ * Set the event collection mode for a \p context. The \p mode
547
+ * controls the event collection behavior of all events in event
548
+ * groups created in the \p context. This API is invalid in kernel
549
+ * replay mode.
550
+ * \note \b Thread-safety: this function is thread safe.
551
+ *
552
+ * \param context The context
553
+ * \param mode The event collection mode
554
+ *
555
+ * \retval CUPTI_SUCCESS
556
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
557
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
558
+ * \retval CUPTI_ERROR_INVALID_OPERATION if called when replay mode is enabled
559
+ * \retval CUPTI_ERROR_NOT_SUPPORTED if mode is not supported on the device
560
+ */
561
+
562
+ CUptiResult CUPTIAPI cuptiSetEventCollectionMode(CUcontext context,
563
+ CUpti_EventCollectionMode mode);
564
+
565
+ /**
566
+ * \brief Read a device attribute.
567
+ *
568
+ * Read a device attribute and return it in \p *value.
569
+ * \note \b Thread-safety: this function is thread safe.
570
+ *
571
+ * \param device The CUDA device
572
+ * \param attrib The attribute to read
573
+ * \param valueSize Size of buffer pointed by the value, and
574
+ * returns the number of bytes written to \p value
575
+ * \param value Returns the value of the attribute
576
+ *
577
+ * \retval CUPTI_SUCCESS
578
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
579
+ * \retval CUPTI_ERROR_INVALID_DEVICE
580
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
581
+ * is NULL, or if \p attrib is not a device attribute
582
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
583
+ * attribute values, indicates that the \p value buffer is too small
584
+ * to hold the attribute value.
585
+ */
586
+ CUptiResult CUPTIAPI cuptiDeviceGetAttribute(CUdevice device,
587
+ CUpti_DeviceAttribute attrib,
588
+ size_t *valueSize,
589
+ void *value);
590
+
591
+ /**
592
+ * \brief Get the number of domains for a device.
593
+ *
594
+ * Returns the number of domains in \p numDomains for a device.
595
+ * \note \b Thread-safety: this function is thread safe.
596
+ *
597
+ * \param device The CUDA device
598
+ * \param numDomains Returns the number of domains
599
+ *
600
+ * \retval CUPTI_SUCCESS
601
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
602
+ * \retval CUPTI_ERROR_INVALID_DEVICE
603
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numDomains is NULL
604
+ */
605
+ CUptiResult CUPTIAPI cuptiDeviceGetNumEventDomains(CUdevice device,
606
+ uint32_t *numDomains);
607
+
608
+ /**
609
+ * \brief Get the event domains for a device.
610
+ *
611
+ * Returns the event domains IDs in \p domainArray for a device. The
612
+ * size of the \p domainArray buffer is given by \p
613
+ * *arraySizeBytes. The size of the \p domainArray buffer must be at
614
+ * least \p numdomains * sizeof(CUpti_EventDomainID) or else all
615
+ * domains will not be returned. The value returned in \p
616
+ * *arraySizeBytes contains the number of bytes returned in \p
617
+ * domainArray.
618
+ * \note \b Thread-safety: this function is thread safe.
619
+ *
620
+ * \param device The CUDA device
621
+ * \param arraySizeBytes The size of \p domainArray in bytes, and
622
+ * returns the number of bytes written to \p domainArray
623
+ * \param domainArray Returns the IDs of the event domains for the device
624
+ *
625
+ * \retval CUPTI_SUCCESS
626
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
627
+ * \retval CUPTI_ERROR_INVALID_DEVICE
628
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or
629
+ * \p domainArray are NULL
630
+ */
631
+ CUptiResult CUPTIAPI cuptiDeviceEnumEventDomains(CUdevice device,
632
+ size_t *arraySizeBytes,
633
+ CUpti_EventDomainID *domainArray);
634
+
635
+ /**
636
+ * \brief Read an event domain attribute.
637
+ *
638
+ * Returns an event domain attribute in \p *value. The size of the \p
639
+ * value buffer is given by \p *valueSize. The value returned in \p
640
+ * *valueSize contains the number of bytes returned in \p value.
641
+ *
642
+ * If the attribute value is a c-string that is longer than \p
643
+ * *valueSize, then only the first \p *valueSize characters will be
644
+ * returned and there will be no terminating null byte.
645
+ * \note \b Thread-safety: this function is thread safe.
646
+ *
647
+ * \param device The CUDA device
648
+ * \param eventDomain ID of the event domain
649
+ * \param attrib The event domain attribute to read
650
+ * \param valueSize The size of the \p value buffer in bytes, and
651
+ * returns the number of bytes written to \p value
652
+ * \param value Returns the attribute's value
653
+ *
654
+ * \retval CUPTI_SUCCESS
655
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
656
+ * \retval CUPTI_ERROR_INVALID_DEVICE
657
+ * \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
658
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
659
+ * is NULL, or if \p attrib is not an event domain attribute
660
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
661
+ * attribute values, indicates that the \p value buffer is too small
662
+ * to hold the attribute value.
663
+ */
664
+ CUptiResult CUPTIAPI cuptiDeviceGetEventDomainAttribute(CUdevice device,
665
+ CUpti_EventDomainID eventDomain,
666
+ CUpti_EventDomainAttribute attrib,
667
+ size_t *valueSize,
668
+ void *value);
669
+
670
+ /**
671
+ * \brief Get the number of event domains available on any device.
672
+ *
673
+ * Returns the total number of event domains available on any
674
+ * CUDA-capable device.
675
+ * \note \b Thread-safety: this function is thread safe.
676
+ *
677
+ * \param numDomains Returns the number of domains
678
+ *
679
+ * \retval CUPTI_SUCCESS
680
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numDomains is NULL
681
+ */
682
+ CUptiResult CUPTIAPI cuptiGetNumEventDomains(uint32_t *numDomains);
683
+
684
+ /**
685
+ * \brief Get the event domains available on any device.
686
+ *
687
+ * Returns all the event domains available on any CUDA-capable device.
688
+ * Event domain IDs are returned in \p domainArray. The size of the \p
689
+ * domainArray buffer is given by \p *arraySizeBytes. The size of the
690
+ * \p domainArray buffer must be at least \p numDomains *
691
+ * sizeof(CUpti_EventDomainID) or all domains will not be
692
+ * returned. The value returned in \p *arraySizeBytes contains the
693
+ * number of bytes returned in \p domainArray.
694
+ * \note \b Thread-safety: this function is thread safe.
695
+ *
696
+ * \param arraySizeBytes The size of \p domainArray in bytes, and
697
+ * returns the number of bytes written to \p domainArray
698
+ * \param domainArray Returns all the event domains
699
+ *
700
+ * \retval CUPTI_SUCCESS
701
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or
702
+ * \p domainArray are NULL
703
+ */
704
+ CUptiResult CUPTIAPI cuptiEnumEventDomains(size_t *arraySizeBytes,
705
+ CUpti_EventDomainID *domainArray);
706
+
707
+ /**
708
+ * \brief Read an event domain attribute.
709
+ *
710
+ * Returns an event domain attribute in \p *value. The size of the \p
711
+ * value buffer is given by \p *valueSize. The value returned in \p
712
+ * *valueSize contains the number of bytes returned in \p value.
713
+ *
714
+ * If the attribute value is a c-string that is longer than \p
715
+ * *valueSize, then only the first \p *valueSize characters will be
716
+ * returned and there will be no terminating null byte.
717
+ * \note \b Thread-safety: this function is thread safe.
718
+ *
719
+ * \param eventDomain ID of the event domain
720
+ * \param attrib The event domain attribute to read
721
+ * \param valueSize The size of the \p value buffer in bytes, and
722
+ * returns the number of bytes written to \p value
723
+ * \param value Returns the attribute's value
724
+ *
725
+ * \retval CUPTI_SUCCESS
726
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
727
+ * \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
728
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
729
+ * is NULL, or if \p attrib is not an event domain attribute
730
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
731
+ * attribute values, indicates that the \p value buffer is too small
732
+ * to hold the attribute value.
733
+ */
734
+ CUptiResult CUPTIAPI cuptiEventDomainGetAttribute(CUpti_EventDomainID eventDomain,
735
+ CUpti_EventDomainAttribute attrib,
736
+ size_t *valueSize,
737
+ void *value);
738
+
739
+ /**
740
+ * \brief Get number of events in a domain.
741
+ *
742
+ * Returns the number of events in \p numEvents for a domain.
743
+ * \note \b Thread-safety: this function is thread safe.
744
+ *
745
+ * \param eventDomain ID of the event domain
746
+ * \param numEvents Returns the number of events in the domain
747
+ *
748
+ * \retval CUPTI_SUCCESS
749
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
750
+ * \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
751
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numEvents is NULL
752
+ */
753
+ CUptiResult CUPTIAPI cuptiEventDomainGetNumEvents(CUpti_EventDomainID eventDomain,
754
+ uint32_t *numEvents);
755
+
756
+ /**
757
+ * \brief Get the events in a domain.
758
+ *
759
+ * Returns the event IDs in \p eventArray for a domain. The size of
760
+ * the \p eventArray buffer is given by \p *arraySizeBytes. The size
761
+ * of the \p eventArray buffer must be at least \p numdomainevents *
762
+ * sizeof(CUpti_EventID) or else all events will not be returned. The
763
+ * value returned in \p *arraySizeBytes contains the number of bytes
764
+ * returned in \p eventArray.
765
+ * \note \b Thread-safety: this function is thread safe.
766
+ *
767
+ * \param eventDomain ID of the event domain
768
+ * \param arraySizeBytes The size of \p eventArray in bytes, and
769
+ * returns the number of bytes written to \p eventArray
770
+ * \param eventArray Returns the IDs of the events in the domain
771
+ *
772
+ * \retval CUPTI_SUCCESS
773
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
774
+ * \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
775
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or \p
776
+ * eventArray are NULL
777
+ */
778
+ CUptiResult CUPTIAPI cuptiEventDomainEnumEvents(CUpti_EventDomainID eventDomain,
779
+ size_t *arraySizeBytes,
780
+ CUpti_EventID *eventArray);
781
+
782
+ /**
783
+ * \brief Get an event attribute.
784
+ *
785
+ * Returns an event attribute in \p *value. The size of the \p
786
+ * value buffer is given by \p *valueSize. The value returned in \p
787
+ * *valueSize contains the number of bytes returned in \p value.
788
+ *
789
+ * If the attribute value is a c-string that is longer than \p
790
+ * *valueSize, then only the first \p *valueSize characters will be
791
+ * returned and there will be no terminating null byte.
792
+ * \note \b Thread-safety: this function is thread safe.
793
+ *
794
+ * \param event ID of the event
795
+ * \param attrib The event attribute to read
796
+ * \param valueSize The size of the \p value buffer in bytes, and
797
+ * returns the number of bytes written to \p value
798
+ * \param value Returns the attribute's value
799
+ *
800
+ * \retval CUPTI_SUCCESS
801
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
802
+ * \retval CUPTI_ERROR_INVALID_EVENT_ID
803
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
804
+ * is NULL, or if \p attrib is not an event attribute
805
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
806
+ * attribute values, indicates that the \p value buffer is too small
807
+ * to hold the attribute value.
808
+ */
809
+ CUptiResult CUPTIAPI cuptiEventGetAttribute(CUpti_EventID event,
810
+ CUpti_EventAttribute attrib,
811
+ size_t *valueSize,
812
+ void *value);
813
+
814
+ /**
815
+ * \brief Find an event by name.
816
+ *
817
+ * Find an event by name and return the event ID in \p *event.
818
+ * \note \b Thread-safety: this function is thread safe.
819
+ *
820
+ * \param device The CUDA device
821
+ * \param eventName The name of the event to find
822
+ * \param event Returns the ID of the found event or undefined if
823
+ * unable to find the event
824
+ *
825
+ * \retval CUPTI_SUCCESS
826
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
827
+ * \retval CUPTI_ERROR_INVALID_DEVICE
828
+ * \retval CUPTI_ERROR_INVALID_EVENT_NAME if unable to find an event
829
+ * with name \p eventName. In this case \p *event is undefined
830
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventName or \p event are NULL
831
+ */
832
+ CUptiResult CUPTIAPI cuptiEventGetIdFromName(CUdevice device,
833
+ const char *eventName,
834
+ CUpti_EventID *event);
835
+
836
+ /**
837
+ * \brief Create a new event group for a context.
838
+ *
839
+ * Creates a new event group for \p context and returns the new group
840
+ * in \p *eventGroup.
841
+ * \note \p flags are reserved for future use and should be set to zero.
842
+ * \note \b Thread-safety: this function is thread safe.
843
+ *
844
+ * \param context The context for the event group
845
+ * \param eventGroup Returns the new event group
846
+ * \param flags Reserved - must be zero
847
+ *
848
+ * \retval CUPTI_SUCCESS
849
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
850
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
851
+ * \retval CUPTI_ERROR_OUT_OF_MEMORY
852
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
853
+ */
854
+ CUptiResult CUPTIAPI cuptiEventGroupCreate(CUcontext context,
855
+ CUpti_EventGroup *eventGroup,
856
+ uint32_t flags);
857
+
858
+ /**
859
+ * \brief Destroy an event group.
860
+ *
861
+ * Destroy an \p eventGroup and free its resources. An event group
862
+ * cannot be destroyed if it is enabled.
863
+ * \note \b Thread-safety: this function is thread safe.
864
+ *
865
+ * \param eventGroup The event group to destroy
866
+ *
867
+ * \retval CUPTI_SUCCESS
868
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
869
+ * \retval CUPTI_ERROR_INVALID_OPERATION if the event group is enabled
870
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if eventGroup is NULL
871
+ */
872
+ CUptiResult CUPTIAPI cuptiEventGroupDestroy(CUpti_EventGroup eventGroup);
873
+
874
+ /**
875
+ * \brief Read an event group attribute.
876
+ *
877
+ * Read an event group attribute and return it in \p *value.
878
+ * \note \b Thread-safety: this function is thread safe but client
879
+ * must guard against simultaneous destruction or modification of \p
880
+ * eventGroup (for example, client must guard against simultaneous
881
+ * calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
882
+ * etc.), and must guard against simultaneous destruction of the
883
+ * context in which \p eventGroup was created (for example, client
884
+ * must guard against simultaneous calls to cudaDeviceReset,
885
+ * cuCtxDestroy, etc.).
886
+ *
887
+ * \param eventGroup The event group
888
+ * \param attrib The attribute to read
889
+ * \param valueSize Size of buffer pointed by the value, and
890
+ * returns the number of bytes written to \p value
891
+ * \param value Returns the value of the attribute
892
+ *
893
+ * \retval CUPTI_SUCCESS
894
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
895
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
896
+ * is NULL, or if \p attrib is not an eventgroup attribute
897
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
898
+ * attribute values, indicates that the \p value buffer is too small
899
+ * to hold the attribute value.
900
+ */
901
+ CUptiResult CUPTIAPI cuptiEventGroupGetAttribute(CUpti_EventGroup eventGroup,
902
+ CUpti_EventGroupAttribute attrib,
903
+ size_t *valueSize,
904
+ void *value);
905
+
906
+ /**
907
+ * \brief Write an event group attribute.
908
+ *
909
+ * Write an event group attribute.
910
+ * \note \b Thread-safety: this function is thread safe.
911
+ *
912
+ * \param eventGroup The event group
913
+ * \param attrib The attribute to write
914
+ * \param valueSize The size, in bytes, of the value
915
+ * \param value The attribute value to write
916
+ *
917
+ * \retval CUPTI_SUCCESS
918
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
919
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
920
+ * is NULL, or if \p attrib is not an event group attribute, or if
921
+ * \p attrib is not a writable attribute
922
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT Indicates that
923
+ * the \p value buffer is too small to hold the attribute value.
924
+ */
925
+ CUptiResult CUPTIAPI cuptiEventGroupSetAttribute(CUpti_EventGroup eventGroup,
926
+ CUpti_EventGroupAttribute attrib,
927
+ size_t valueSize,
928
+ void *value);
929
+
930
+ /**
931
+ * \brief Add an event to an event group.
932
+ *
933
+ * Add an event to an event group. The event add can fail for a number of reasons:
934
+ * \li The event group is enabled
935
+ * \li The event does not belong to the same event domain as the
936
+ * events that are already in the event group
937
+ * \li Device limitations on the events that can belong to the same group
938
+ * \li The event group is full
939
+ *
940
+ * \note \b Thread-safety: this function is thread safe.
941
+ *
942
+ * \param eventGroup The event group
943
+ * \param event The event to add to the group
944
+ *
945
+ * \retval CUPTI_SUCCESS
946
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
947
+ * \retval CUPTI_ERROR_INVALID_EVENT_ID
948
+ * \retval CUPTI_ERROR_OUT_OF_MEMORY
949
+ * \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is enabled
950
+ * \retval CUPTI_ERROR_NOT_COMPATIBLE if \p event belongs to a
951
+ * different event domain than the events already in \p eventGroup, or
952
+ * if a device limitation prevents \p event from being collected at
953
+ * the same time as the events already in \p eventGroup
954
+ * \retval CUPTI_ERROR_MAX_LIMIT_REACHED if \p eventGroup is full
955
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
956
+ */
957
+ CUptiResult CUPTIAPI cuptiEventGroupAddEvent(CUpti_EventGroup eventGroup,
958
+ CUpti_EventID event);
959
+
960
+ /**
961
+ * \brief Remove an event from an event group.
962
+ *
963
+ * Remove \p event from the an event group. The event cannot be
964
+ * removed if the event group is enabled.
965
+ * \note \b Thread-safety: this function is thread safe.
966
+ *
967
+ * \param eventGroup The event group
968
+ * \param event The event to remove from the group
969
+ *
970
+ * \retval CUPTI_SUCCESS
971
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
972
+ * \retval CUPTI_ERROR_INVALID_EVENT_ID
973
+ * \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is enabled
974
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
975
+ */
976
+ CUptiResult CUPTIAPI cuptiEventGroupRemoveEvent(CUpti_EventGroup eventGroup,
977
+ CUpti_EventID event);
978
+
979
+ /**
980
+ * \brief Remove all events from an event group.
981
+ *
982
+ * Remove all events from an event group. Events cannot be removed if
983
+ * the event group is enabled.
984
+ * \note \b Thread-safety: this function is thread safe.
985
+ *
986
+ * \param eventGroup The event group
987
+ *
988
+ * \retval CUPTI_SUCCESS
989
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
990
+ * \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is enabled
991
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
992
+ */
993
+ CUptiResult CUPTIAPI cuptiEventGroupRemoveAllEvents(CUpti_EventGroup eventGroup);
994
+
995
+ /**
996
+ * \brief Zero all the event counts in an event group.
997
+ *
998
+ * Zero all the event counts in an event group.
999
+ * \note \b Thread-safety: this function is thread safe but client
1000
+ * must guard against simultaneous destruction or modification of \p
1001
+ * eventGroup (for example, client must guard against simultaneous
1002
+ * calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
1003
+ * etc.), and must guard against simultaneous destruction of the
1004
+ * context in which \p eventGroup was created (for example, client
1005
+ * must guard against simultaneous calls to cudaDeviceReset,
1006
+ * cuCtxDestroy, etc.).
1007
+ *
1008
+ * \param eventGroup The event group
1009
+ *
1010
+ * \retval CUPTI_SUCCESS
1011
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1012
+ * \retval CUPTI_ERROR_HARDWARE
1013
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
1014
+ */
1015
+ CUptiResult CUPTIAPI cuptiEventGroupResetAllEvents(CUpti_EventGroup eventGroup);
1016
+
1017
+ /**
1018
+ * \brief Enable an event group.
1019
+ *
1020
+ * Enable an event group. Enabling an event group zeros the value of
1021
+ * all the events in the group and then starts collection of those
1022
+ * events.
1023
+ * \note \b Thread-safety: this function is thread safe.
1024
+ *
1025
+ * \param eventGroup The event group
1026
+ *
1027
+ * \retval CUPTI_SUCCESS
1028
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1029
+ * \retval CUPTI_ERROR_HARDWARE
1030
+ * \retval CUPTI_ERROR_NOT_READY if \p eventGroup does not contain any events
1031
+ * \retval CUPTI_ERROR_NOT_COMPATIBLE if \p eventGroup cannot be
1032
+ * enabled due to other already enabled event groups
1033
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
1034
+ * \retval CUPTI_ERROR_HARDWARE_BUSY if another client is profiling
1035
+ * and hardware is busy
1036
+ */
1037
+ CUptiResult CUPTIAPI cuptiEventGroupEnable(CUpti_EventGroup eventGroup);
1038
+
1039
+ /**
1040
+ * \brief Disable an event group.
1041
+ *
1042
+ * Disable an event group. Disabling an event group stops collection
1043
+ * of events contained in the group.
1044
+ * \note \b Thread-safety: this function is thread safe.
1045
+ *
1046
+ * \param eventGroup The event group
1047
+ *
1048
+ * \retval CUPTI_SUCCESS
1049
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1050
+ * \retval CUPTI_ERROR_HARDWARE
1051
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
1052
+ */
1053
+ CUptiResult CUPTIAPI cuptiEventGroupDisable(CUpti_EventGroup eventGroup);
1054
+
1055
+ /**
1056
+ * \brief Read the value for an event in an event group.
1057
+ *
1058
+ * Read the value for an event in an event group. The event value is
1059
+ * returned in the \p eventValueBuffer buffer. \p
1060
+ * eventValueBufferSizeBytes indicates the size of the \p
1061
+ * eventValueBuffer buffer. The buffer must be at least sizeof(uint64)
1062
+ * if ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is not set
1063
+ * on the group containing the event. The buffer must be at least
1064
+ * (sizeof(uint64) * number of domain instances) if
1065
+ * ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is set on the
1066
+ * group.
1067
+ *
1068
+ * If any instance of an event counter overflows, the value returned
1069
+ * for that event instance will be ::CUPTI_EVENT_OVERFLOW.
1070
+ *
1071
+ * The only allowed value for \p flags is ::CUPTI_EVENT_READ_FLAG_NONE.
1072
+ *
1073
+ * Reading an event from a disabled event group is not allowed. After
1074
+ * being read, an event's value is reset to zero.
1075
+ * \note \b Thread-safety: this function is thread safe but client
1076
+ * must guard against simultaneous destruction or modification of \p
1077
+ * eventGroup (for example, client must guard against simultaneous
1078
+ * calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
1079
+ * etc.), and must guard against simultaneous destruction of the
1080
+ * context in which \p eventGroup was created (for example, client
1081
+ * must guard against simultaneous calls to cudaDeviceReset,
1082
+ * cuCtxDestroy, etc.). If \ref cuptiEventGroupResetAllEvents is
1083
+ * called simultaneously with this function, then returned event
1084
+ * values are undefined.
1085
+ *
1086
+ * \param eventGroup The event group
1087
+ * \param flags Flags controlling the reading mode
1088
+ * \param event The event to read
1089
+ * \param eventValueBufferSizeBytes The size of \p eventValueBuffer
1090
+ * in bytes, and returns the number of bytes written to \p
1091
+ * eventValueBuffer
1092
+ * \param eventValueBuffer Returns the event value(s)
1093
+ *
1094
+ * \retval CUPTI_SUCCESS
1095
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1096
+ * \retval CUPTI_ERROR_INVALID_EVENT_ID
1097
+ * \retval CUPTI_ERROR_HARDWARE
1098
+ * \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is disabled
1099
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup, \p
1100
+ * eventValueBufferSizeBytes or \p eventValueBuffer is NULL
1101
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if size of \p eventValueBuffer
1102
+ * is not sufficient
1103
+ */
1104
+ CUptiResult CUPTIAPI cuptiEventGroupReadEvent(CUpti_EventGroup eventGroup,
1105
+ CUpti_ReadEventFlags flags,
1106
+ CUpti_EventID event,
1107
+ size_t *eventValueBufferSizeBytes,
1108
+ uint64_t *eventValueBuffer);
1109
+
1110
+ /**
1111
+ * \brief Read the values for all the events in an event group.
1112
+ *
1113
+ * Read the values for all the events in an event group. The event
1114
+ * values are returned in the \p eventValueBuffer buffer. \p
1115
+ * eventValueBufferSizeBytes indicates the size of \p
1116
+ * eventValueBuffer. The buffer must be at least (sizeof(uint64) *
1117
+ * number of events in group) if
1118
+ * ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is not set on
1119
+ * the group containing the events. The buffer must be at least
1120
+ * (sizeof(uint64) * number of domain instances * number of events in
1121
+ * group) if ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is
1122
+ * set on the group.
1123
+ *
1124
+ * The data format returned in \p eventValueBuffer is:
1125
+ * - domain instance 0: event0 event1 ... eventN
1126
+ * - domain instance 1: event0 event1 ... eventN
1127
+ * - ...
1128
+ * - domain instance M: event0 event1 ... eventN
1129
+ *
1130
+ * The event order in \p eventValueBuffer is returned in \p
1131
+ * eventIdArray. The size of \p eventIdArray is specified in \p
1132
+ * eventIdArraySizeBytes. The size should be at least
1133
+ * (sizeof(CUpti_EventID) * number of events in group).
1134
+ *
1135
+ * If any instance of any event counter overflows, the value returned
1136
+ * for that event instance will be ::CUPTI_EVENT_OVERFLOW.
1137
+ *
1138
+ * The only allowed value for \p flags is ::CUPTI_EVENT_READ_FLAG_NONE.
1139
+ *
1140
+ * Reading events from a disabled event group is not allowed. After
1141
+ * being read, an event's value is reset to zero.
1142
+ * \note \b Thread-safety: this function is thread safe but client
1143
+ * must guard against simultaneous destruction or modification of \p
1144
+ * eventGroup (for example, client must guard against simultaneous
1145
+ * calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
1146
+ * etc.), and must guard against simultaneous destruction of the
1147
+ * context in which \p eventGroup was created (for example, client
1148
+ * must guard against simultaneous calls to cudaDeviceReset,
1149
+ * cuCtxDestroy, etc.). If \ref cuptiEventGroupResetAllEvents is
1150
+ * called simultaneously with this function, then returned event
1151
+ * values are undefined.
1152
+ *
1153
+ * \param eventGroup The event group
1154
+ * \param flags Flags controlling the reading mode
1155
+ * \param eventValueBufferSizeBytes The size of \p eventValueBuffer in
1156
+ * bytes, and returns the number of bytes written to \p
1157
+ * eventValueBuffer
1158
+ * \param eventValueBuffer Returns the event values
1159
+ * \param eventIdArraySizeBytes The size of \p eventIdArray in bytes,
1160
+ * and returns the number of bytes written to \p eventIdArray
1161
+ * \param eventIdArray Returns the IDs of the events in the same order
1162
+ * as the values return in eventValueBuffer.
1163
+ * \param numEventIdsRead Returns the number of event IDs returned
1164
+ * in \p eventIdArray
1165
+ *
1166
+ * \retval CUPTI_SUCCESS
1167
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1168
+ * \retval CUPTI_ERROR_HARDWARE
1169
+ * \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is disabled
1170
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup, \p
1171
+ * eventValueBufferSizeBytes, \p eventValueBuffer, \p
1172
+ * eventIdArraySizeBytes, \p eventIdArray or \p numEventIdsRead is
1173
+ * NULL
1174
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if size of \p eventValueBuffer
1175
+ * or \p eventIdArray is not sufficient
1176
+ */
1177
+ CUptiResult CUPTIAPI cuptiEventGroupReadAllEvents(CUpti_EventGroup eventGroup,
1178
+ CUpti_ReadEventFlags flags,
1179
+ size_t *eventValueBufferSizeBytes,
1180
+ uint64_t *eventValueBuffer,
1181
+ size_t *eventIdArraySizeBytes,
1182
+ CUpti_EventID *eventIdArray,
1183
+ size_t *numEventIdsRead);
1184
+
1185
+ /**
1186
+ * \brief For a set of events, get the grouping that indicates the
1187
+ * number of passes and the event groups necessary to collect the
1188
+ * events.
1189
+ *
1190
+ * The number of events that can be collected simultaneously varies by
1191
+ * device and by the type of the events. When events can be collected
1192
+ * simultaneously, they may need to be grouped into multiple event
1193
+ * groups because they are from different event domains. This function
1194
+ * takes a set of events and determines how many passes are required
1195
+ * to collect all those events, and which events can be collected
1196
+ * simultaneously in each pass.
1197
+ *
1198
+ * The CUpti_EventGroupSets returned in \p eventGroupPasses indicates
1199
+ * how many passes are required to collect the events with the \p
1200
+ * numSets field. Within each event group set, the \p sets array
1201
+ * indicates the event groups that should be collected on each pass.
1202
+ * \note \b Thread-safety: this function is thread safe, but client
1203
+ * must guard against another thread simultaneously destroying \p
1204
+ * context.
1205
+ *
1206
+ * \param context The context for event collection
1207
+ * \param eventIdArraySizeBytes Size of \p eventIdArray in bytes
1208
+ * \param eventIdArray Array of event IDs that need to be grouped
1209
+ * \param eventGroupPasses Returns a CUpti_EventGroupSets object that
1210
+ * indicates the number of passes required to collect the events and
1211
+ * the events to collect on each pass
1212
+ *
1213
+ * \retval CUPTI_SUCCESS
1214
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1215
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
1216
+ * \retval CUPTI_ERROR_INVALID_EVENT_ID
1217
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventIdArray or
1218
+ * \p eventGroupPasses is NULL
1219
+ */
1220
+ CUptiResult CUPTIAPI cuptiEventGroupSetsCreate(CUcontext context,
1221
+ size_t eventIdArraySizeBytes,
1222
+ CUpti_EventID *eventIdArray,
1223
+ CUpti_EventGroupSets **eventGroupPasses);
1224
+
1225
+ /**
1226
+ * \brief Destroy a event group sets object.
1227
+ *
1228
+ * Destroy a CUpti_EventGroupSets object.
1229
+ * \note \b Thread-safety: this function is thread safe.
1230
+ *
1231
+ * \param eventGroupSets The object to destroy
1232
+ *
1233
+ * \retval CUPTI_SUCCESS
1234
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1235
+ * \retval CUPTI_ERROR_INVALID_OPERATION if any of the event groups
1236
+ * contained in the sets is enabled
1237
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroupSets is NULL
1238
+ */
1239
+ CUptiResult CUPTIAPI cuptiEventGroupSetsDestroy(CUpti_EventGroupSets *eventGroupSets);
1240
+
1241
+
1242
+ /**
1243
+ * \brief Enable an event group set.
1244
+ *
1245
+ * Enable a set of event groups. Enabling a set of event groups zeros the value of
1246
+ * all the events in all the groups and then starts collection of those events.
1247
+ * \note \b Thread-safety: this function is thread safe.
1248
+ *
1249
+ * \param eventGroupSet The pointer to the event group set
1250
+ *
1251
+ * \retval CUPTI_SUCCESS
1252
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1253
+ * \retval CUPTI_ERROR_HARDWARE
1254
+ * \retval CUPTI_ERROR_NOT_READY if \p eventGroup does not contain any events
1255
+ * \retval CUPTI_ERROR_NOT_COMPATIBLE if \p eventGroup cannot be
1256
+ * enabled due to other already enabled event groups
1257
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroupSet is NULL
1258
+ * \retval CUPTI_ERROR_HARDWARE_BUSY if other client is profiling and hardware is
1259
+ * busy
1260
+ */
1261
+ CUptiResult CUPTIAPI cuptiEventGroupSetEnable(CUpti_EventGroupSet *eventGroupSet);
1262
+
1263
+ /**
1264
+ * \brief Disable an event group set.
1265
+ *
1266
+ * Disable a set of event groups. Disabling a set of event groups
1267
+ * stops collection of events contained in the groups.
1268
+ * \note \b Thread-safety: this function is thread safe.
1269
+ * \note \b If this call fails, some of the event groups in the set may be disabled
1270
+ * and other event groups may remain enabled.
1271
+ *
1272
+ * \param eventGroupSet The pointer to the event group set
1273
+ * \retval CUPTI_SUCCESS
1274
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1275
+ * \retval CUPTI_ERROR_HARDWARE
1276
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroupSet is NULL
1277
+ */
1278
+ CUptiResult CUPTIAPI cuptiEventGroupSetDisable(CUpti_EventGroupSet *eventGroupSet);
1279
+
1280
+ /**
1281
+ * \brief Enable kernel replay mode.
1282
+ *
1283
+ * Set profiling mode for the context to replay mode. In this mode,
1284
+ * any number of events can be collected in one run of the kernel. The
1285
+ * event collection mode will automatically switch to
1286
+ * CUPTI_EVENT_COLLECTION_MODE_KERNEL. In this mode, \ref
1287
+ * cuptiSetEventCollectionMode will return
1288
+ * CUPTI_ERROR_INVALID_OPERATION.
1289
+ * \note \b Kernels might take longer to run if many events are enabled.
1290
+ * \note \b Thread-safety: this function is thread safe.
1291
+ *
1292
+ * \param context The context
1293
+ * \retval CUPTI_SUCCESS
1294
+ */
1295
+ CUptiResult CUPTIAPI cuptiEnableKernelReplayMode(CUcontext context);
1296
+
1297
+ /**
1298
+ * \brief Disable kernel replay mode.
1299
+ *
1300
+ * Set profiling mode for the context to non-replay (default)
1301
+ * mode. Event collection mode will be set to
1302
+ * CUPTI_EVENT_COLLECTION_MODE_KERNEL. All previously enabled
1303
+ * event groups and event group sets will be disabled.
1304
+ * \note \b Thread-safety: this function is thread safe.
1305
+ *
1306
+ * \param context The context
1307
+ * \retval CUPTI_SUCCESS
1308
+ */
1309
+ CUptiResult CUPTIAPI cuptiDisableKernelReplayMode(CUcontext context);
1310
+
1311
+ /**
1312
+ * \brief Function type for getting updates on kernel replay.
1313
+ *
1314
+ * \param kernelName The mangled kernel name
1315
+ * \param numReplaysDone Number of replays done so far
1316
+ * \param customData Pointer of any custom data passed in when subscribing
1317
+ */
1318
+ typedef void (CUPTIAPI *CUpti_KernelReplayUpdateFunc)(
1319
+ const char *kernelName,
1320
+ int numReplaysDone,
1321
+ void *customData);
1322
+
1323
+ /**
1324
+ * \brief Subscribe to kernel replay updates.
1325
+ *
1326
+ * When subscribed, the function pointer passed in will be called each time a
1327
+ * kernel run is finished during kernel replay. Previously subscribed function
1328
+ * pointer will be replaced. Pass in NULL as the function pointer unsubscribes
1329
+ * the update.
1330
+ *
1331
+ * \param updateFunc The update function pointer
1332
+ * \param customData Pointer to any custom data
1333
+ * \retval CUPTI_SUCCESS
1334
+ */
1335
+ CUptiResult CUPTIAPI cuptiKernelReplaySubscribeUpdate(CUpti_KernelReplayUpdateFunc updateFunc, void *customData);
1336
+
1337
+ /** @} */ /* END CUPTI_EVENT_API */
1338
+
1339
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
1340
+ #pragma GCC visibility pop
1341
+ #endif
1342
+
1343
+ #if defined(__cplusplus)
1344
+ }
1345
+ #endif
1346
+
1347
+ #endif /*_CUPTI_EVENTS_H_*/
1348
+
1349
+
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_metrics.h ADDED
@@ -0,0 +1,824 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2011-2024 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_METRIC_H_)
51
+ #define _CUPTI_METRIC_H_
52
+
53
+ #include <cuda.h>
54
+ #include <string.h>
55
+ #include <cuda_stdint.h>
56
+ #include <cupti_result.h>
57
+
58
+ #ifndef CUPTIAPI
59
+ #ifdef _WIN32
60
+ #define CUPTIAPI __stdcall
61
+ #else
62
+ #define CUPTIAPI
63
+ #endif
64
+ #endif
65
+
66
+ #if defined(__cplusplus)
67
+ extern "C" {
68
+ #endif
69
+
70
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
71
+ #pragma GCC visibility push(default)
72
+ #endif
73
+
74
+ /**
75
+ * \defgroup CUPTI_METRIC_API CUPTI Metric API
76
+ * Functions, types, and enums that implement the CUPTI Metric API.
77
+ *
78
+ * \note The CUPTI metric API from the header cupti_metrics.h is not supported on devices
79
+ * with compute capability 7.5 and higher (i.e. Turing and later GPU architectures).
80
+ * This API is deprecated in CUDA 12.8 release and will be removed in a future CUDA release.
81
+ * This is replaced by the host profiling API in the header cupti_profiler_host.h and
82
+ * target profiling API in the header cupti_range_profiler.h which are supported on
83
+ * devices with compute capability 7.0 and higher (i.e. Volta and later GPU architectures).
84
+ *
85
+ * @{
86
+ */
87
+
88
+ /**
89
+ * \brief ID for a metric.
90
+ *
91
+ * A metric provides a measure of some aspect of the device.
92
+ */
93
+ typedef uint32_t CUpti_MetricID;
94
+
95
+ /**
96
+ * \brief A metric category.
97
+ *
98
+ * Each metric is assigned to a category that represents the general
99
+ * type of the metric. A metric's category is accessed using \ref
100
+ * cuptiMetricGetAttribute and the CUPTI_METRIC_ATTR_CATEGORY
101
+ * attribute.
102
+ */
103
+ typedef enum {
104
+ /**
105
+ * A memory related metric.
106
+ */
107
+ CUPTI_METRIC_CATEGORY_MEMORY = 0,
108
+ /**
109
+ * An instruction related metric.
110
+ */
111
+ CUPTI_METRIC_CATEGORY_INSTRUCTION = 1,
112
+ /**
113
+ * A multiprocessor related metric.
114
+ */
115
+ CUPTI_METRIC_CATEGORY_MULTIPROCESSOR = 2,
116
+ /**
117
+ * A cache related metric.
118
+ */
119
+ CUPTI_METRIC_CATEGORY_CACHE = 3,
120
+ /**
121
+ * A texture related metric.
122
+ */
123
+ CUPTI_METRIC_CATEGORY_TEXTURE = 4,
124
+ /**
125
+ *A Nvlink related metric.
126
+ */
127
+ CUPTI_METRIC_CATEGORY_NVLINK = 5,
128
+ /**
129
+ *A PCIe related metric.
130
+ */
131
+ CUPTI_METRIC_CATEGORY_PCIE = 6,
132
+ CUPTI_METRIC_CATEGORY_FORCE_INT = 0x7fffffff,
133
+ } CUpti_MetricCategory;
134
+
135
+ /**
136
+ * \brief A metric evaluation mode.
137
+ *
138
+ * A metric can be evaluated per hardware instance to know the load balancing
139
+ * across instances of a domain or the metric can be evaluated in aggregate mode
140
+ * when the events involved in metric evaluation are from different event
141
+ * domains. It might be possible to evaluate some metrics in both
142
+ * modes for convenience. A metric's evaluation mode is accessed using \ref
143
+ * CUpti_MetricEvaluationMode and the CUPTI_METRIC_ATTR_EVALUATION_MODE
144
+ * attribute.
145
+ */
146
+ typedef enum {
147
+ /**
148
+ * If this bit is set, the metric can be profiled for each instance of the
149
+ * domain. The event values passed to \ref cuptiMetricGetValue can contain
150
+ * values for one instance of the domain. And \ref cuptiMetricGetValue can
151
+ * be called for each instance.
152
+ */
153
+ CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE = 1,
154
+ /**
155
+ * If this bit is set, the metric can be profiled over all instances. The
156
+ * event values passed to \ref cuptiMetricGetValue can be aggregated values
157
+ * of events for all instances of the domain.
158
+ */
159
+ CUPTI_METRIC_EVALUATION_MODE_AGGREGATE = 1 << 1,
160
+ CUPTI_METRIC_EVALUATION_MODE_FORCE_INT = 0x7fffffff,
161
+ } CUpti_MetricEvaluationMode;
162
+
163
+ /**
164
+ * \brief Kinds of metric values.
165
+ *
166
+ * Metric values can be one of several different kinds. Corresponding
167
+ * to each kind is a member of the CUpti_MetricValue union. The metric
168
+ * value returned by \ref cuptiMetricGetValue should be accessed using
169
+ * the appropriate member of that union based on its value kind.
170
+ */
171
+ typedef enum {
172
+ /**
173
+ * The metric value is a 64-bit double.
174
+ */
175
+ CUPTI_METRIC_VALUE_KIND_DOUBLE = 0,
176
+ /**
177
+ * The metric value is a 64-bit unsigned integer.
178
+ */
179
+ CUPTI_METRIC_VALUE_KIND_UINT64 = 1,
180
+ /**
181
+ * The metric value is a percentage represented by a 64-bit
182
+ * double. For example, 57.5% is represented by the value 57.5.
183
+ */
184
+ CUPTI_METRIC_VALUE_KIND_PERCENT = 2,
185
+ /**
186
+ * The metric value is a throughput represented by a 64-bit
187
+ * integer. The unit for throughput values is bytes/second.
188
+ */
189
+ CUPTI_METRIC_VALUE_KIND_THROUGHPUT = 3,
190
+ /**
191
+ * The metric value is a 64-bit signed integer.
192
+ */
193
+ CUPTI_METRIC_VALUE_KIND_INT64 = 4,
194
+ /**
195
+ * The metric value is a utilization level, as represented by
196
+ * CUpti_MetricValueUtilizationLevel.
197
+ */
198
+ CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL = 5,
199
+
200
+ CUPTI_METRIC_VALUE_KIND_FORCE_INT = 0x7fffffff
201
+ } CUpti_MetricValueKind;
202
+
203
+ /**
204
+ * \brief Enumeration of utilization levels for metrics values of kind
205
+ * CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL. Utilization values can
206
+ * vary from IDLE (0) to MAX (10) but the enumeration only provides
207
+ * specific names for a few values.
208
+ */
209
+ typedef enum {
210
+ CUPTI_METRIC_VALUE_UTILIZATION_IDLE = 0,
211
+ CUPTI_METRIC_VALUE_UTILIZATION_LOW = 2,
212
+ CUPTI_METRIC_VALUE_UTILIZATION_MID = 5,
213
+ CUPTI_METRIC_VALUE_UTILIZATION_HIGH = 8,
214
+ CUPTI_METRIC_VALUE_UTILIZATION_MAX = 10,
215
+ CUPTI_METRIC_VALUE_UTILIZATION_FORCE_INT = 0x7fffffff
216
+ } CUpti_MetricValueUtilizationLevel;
217
+
218
+ /**
219
+ * \brief Metric attributes.
220
+ *
221
+ * Metric attributes describe properties of a metric. These attributes
222
+ * can be read using \ref cuptiMetricGetAttribute.
223
+ */
224
+ typedef enum {
225
+ /**
226
+ * Metric name. Value is a null terminated const c-string.
227
+ */
228
+ CUPTI_METRIC_ATTR_NAME = 0,
229
+ /**
230
+ * Short description of metric. Value is a null terminated const c-string.
231
+ */
232
+ CUPTI_METRIC_ATTR_SHORT_DESCRIPTION = 1,
233
+ /**
234
+ * Long description of metric. Value is a null terminated const c-string.
235
+ */
236
+ CUPTI_METRIC_ATTR_LONG_DESCRIPTION = 2,
237
+ /**
238
+ * Category of the metric. Value is of type CUpti_MetricCategory.
239
+ */
240
+ CUPTI_METRIC_ATTR_CATEGORY = 3,
241
+ /**
242
+ * Value type of the metric. Value is of type CUpti_MetricValueKind.
243
+ */
244
+ CUPTI_METRIC_ATTR_VALUE_KIND = 4,
245
+ /**
246
+ * Metric evaluation mode. Value is of type CUpti_MetricEvaluationMode.
247
+ */
248
+ CUPTI_METRIC_ATTR_EVALUATION_MODE = 5,
249
+ CUPTI_METRIC_ATTR_FORCE_INT = 0x7fffffff,
250
+ } CUpti_MetricAttribute;
251
+
252
+ /**
253
+ * \brief A metric value.
254
+ *
255
+ * Metric values can be one of several different kinds. Corresponding
256
+ * to each kind is a member of the CUpti_MetricValue union. The metric
257
+ * value returned by \ref cuptiMetricGetValue should be accessed using
258
+ * the appropriate member of that union based on its value kind.
259
+ */
260
+ typedef union {
261
+ /*
262
+ * Value for CUPTI_METRIC_VALUE_KIND_DOUBLE.
263
+ */
264
+ double metricValueDouble;
265
+ /*
266
+ * Value for CUPTI_METRIC_VALUE_KIND_UINT64.
267
+ */
268
+ uint64_t metricValueUint64;
269
+ /*
270
+ * Value for CUPTI_METRIC_VALUE_KIND_INT64.
271
+ */
272
+ int64_t metricValueInt64;
273
+ /*
274
+ * Value for CUPTI_METRIC_VALUE_KIND_PERCENT. For example, 57.5% is
275
+ * represented by the value 57.5.
276
+ */
277
+ double metricValuePercent;
278
+ /*
279
+ * Value for CUPTI_METRIC_VALUE_KIND_THROUGHPUT. The unit for
280
+ * throughput values is bytes/second.
281
+ */
282
+ uint64_t metricValueThroughput;
283
+ /*
284
+ * Value for CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL.
285
+ */
286
+ CUpti_MetricValueUtilizationLevel metricValueUtilizationLevel;
287
+ } CUpti_MetricValue;
288
+
289
+ /**
290
+ * \brief Device class.
291
+ *
292
+ * Enumeration of device classes for metric property
293
+ * CUPTI_METRIC_PROPERTY_DEVICE_CLASS.
294
+ */
295
+ typedef enum {
296
+ CUPTI_METRIC_PROPERTY_DEVICE_CLASS_TESLA = 0,
297
+ CUPTI_METRIC_PROPERTY_DEVICE_CLASS_QUADRO = 1,
298
+ CUPTI_METRIC_PROPERTY_DEVICE_CLASS_GEFORCE = 2,
299
+ CUPTI_METRIC_PROPERTY_DEVICE_CLASS_TEGRA = 3,
300
+ } CUpti_MetricPropertyDeviceClass;
301
+
302
+ /**
303
+ * \brief Metric device properties.
304
+ *
305
+ * Metric device properties describe device properties which are needed for a metric.
306
+ * Some of these properties can be collected using cuDeviceGetAttribute.
307
+ */
308
+ typedef enum {
309
+ /*
310
+ * Number of multiprocessors on a device. This can be collected
311
+ * using value of \param CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT of
312
+ * cuDeviceGetAttribute.
313
+ */
314
+ CUPTI_METRIC_PROPERTY_MULTIPROCESSOR_COUNT,
315
+ /*
316
+ * Maximum number of warps on a multiprocessor. This can be
317
+ * collected using ratio of value of \param
318
+ * CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR and \param
319
+ * CU_DEVICE_ATTRIBUTE_WARP_SIZE of cuDeviceGetAttribute.
320
+ */
321
+ CUPTI_METRIC_PROPERTY_WARPS_PER_MULTIPROCESSOR,
322
+ /*
323
+ * GPU Time for kernel in ns. This should be profiled using CUPTI
324
+ * Activity API.
325
+ */
326
+ CUPTI_METRIC_PROPERTY_KERNEL_GPU_TIME,
327
+ /*
328
+ * Clock rate for device in KHz. This should be collected using
329
+ * value of \param CU_DEVICE_ATTRIBUTE_CLOCK_RATE of
330
+ * cuDeviceGetAttribute.
331
+ */
332
+ CUPTI_METRIC_PROPERTY_CLOCK_RATE,
333
+ /*
334
+ * Number of Frame buffer units for device. This should be collected
335
+ * using value of \param CUPTI_DEVICE_ATTRIBUTE_MAX_FRAME_BUFFERS of
336
+ * cuptiDeviceGetAttribute.
337
+ */
338
+ CUPTI_METRIC_PROPERTY_FRAME_BUFFER_COUNT,
339
+ /*
340
+ * Global memory bandwidth in KBytes/sec. This should be collected
341
+ * using value of \param CUPTI_DEVICE_ATTR_GLOBAL_MEMORY_BANDWIDTH
342
+ * of cuptiDeviceGetAttribute.
343
+ */
344
+ CUPTI_METRIC_PROPERTY_GLOBAL_MEMORY_BANDWIDTH,
345
+ /*
346
+ * PCIE link rate in Mega bits/sec. This should be collected using
347
+ * value of \param CUPTI_DEVICE_ATTR_PCIE_LINK_RATE of
348
+ * cuptiDeviceGetAttribute.
349
+ */
350
+ CUPTI_METRIC_PROPERTY_PCIE_LINK_RATE,
351
+ /*
352
+ * PCIE link width for device. This should be collected using
353
+ * value of \param CUPTI_DEVICE_ATTR_PCIE_LINK_WIDTH of
354
+ * cuptiDeviceGetAttribute.
355
+ */
356
+ CUPTI_METRIC_PROPERTY_PCIE_LINK_WIDTH,
357
+ /*
358
+ * PCIE generation for device. This should be collected using
359
+ * value of \param CUPTI_DEVICE_ATTR_PCIE_GEN of
360
+ * cuptiDeviceGetAttribute.
361
+ */
362
+ CUPTI_METRIC_PROPERTY_PCIE_GEN,
363
+ /*
364
+ * The device class. This should be collected using
365
+ * value of \param CUPTI_DEVICE_ATTR_DEVICE_CLASS of
366
+ * cuptiDeviceGetAttribute.
367
+ */
368
+ CUPTI_METRIC_PROPERTY_DEVICE_CLASS,
369
+ /*
370
+ * Peak single precision floating point operations that
371
+ * can be performed in one cycle by the device.
372
+ * This should be collected using value of
373
+ * \param CUPTI_DEVICE_ATTR_FLOP_SP_PER_CYCLE of
374
+ * cuptiDeviceGetAttribute.
375
+ */
376
+ CUPTI_METRIC_PROPERTY_FLOP_SP_PER_CYCLE,
377
+ /*
378
+ * Peak double precision floating point operations that
379
+ * can be performed in one cycle by the device.
380
+ * This should be collected using value of
381
+ * \param CUPTI_DEVICE_ATTR_FLOP_DP_PER_CYCLE of
382
+ * cuptiDeviceGetAttribute.
383
+ */
384
+ CUPTI_METRIC_PROPERTY_FLOP_DP_PER_CYCLE,
385
+ /*
386
+ * Number of L2 units on a device. This can be collected
387
+ * using value of \param CUPTI_DEVICE_ATTR_MAX_L2_UNITS of
388
+ * cuDeviceGetAttribute.
389
+ */
390
+ CUPTI_METRIC_PROPERTY_L2_UNITS,
391
+ /*
392
+ * Whether ECC support is enabled on the device. This can be
393
+ * collected using value of \param CU_DEVICE_ATTRIBUTE_ECC_ENABLED of
394
+ * cuDeviceGetAttribute.
395
+ */
396
+ CUPTI_METRIC_PROPERTY_ECC_ENABLED,
397
+ /*
398
+ * Peak half precision floating point operations that
399
+ * can be performed in one cycle by the device.
400
+ * This should be collected using value of
401
+ * \param CUPTI_DEVICE_ATTR_FLOP_HP_PER_CYCLE of
402
+ * cuptiDeviceGetAttribute.
403
+ */
404
+ CUPTI_METRIC_PROPERTY_FLOP_HP_PER_CYCLE,
405
+ /*
406
+ * NVLINK Bandwitdh for device. This should be collected
407
+ * using value of \param CUPTI_DEVICE_ATTR_GPU_CPU_NVLINK_BW of
408
+ * cuptiDeviceGetAttribute.
409
+ */
410
+ CUPTI_METRIC_PROPERTY_GPU_CPU_NVLINK_BANDWIDTH,
411
+ } CUpti_MetricPropertyID;
412
+
413
+ /**
414
+ * \brief Get the total number of metrics available on any device.
415
+ *
416
+ * Returns the total number of metrics available on any CUDA-capable
417
+ * devices.
418
+ *
419
+ * \param numMetrics Returns the number of metrics
420
+ *
421
+ * \retval CUPTI_SUCCESS
422
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numMetrics is NULL
423
+ */
424
+ CUptiResult CUPTIAPI cuptiGetNumMetrics(uint32_t *numMetrics);
425
+
426
+ /**
427
+ * \brief Get all the metrics available on any device.
428
+ *
429
+ * Returns the metric IDs in \p metricArray for all CUDA-capable
430
+ * devices. The size of the \p metricArray buffer is given by \p
431
+ * *arraySizeBytes. The size of the \p metricArray buffer must be at
432
+ * least \p numMetrics * sizeof(CUpti_MetricID) or all metric IDs will
433
+ * not be returned. The value returned in \p *arraySizeBytes contains
434
+ * the number of bytes returned in \p metricArray.
435
+ *
436
+ * \param arraySizeBytes The size of \p metricArray in bytes, and
437
+ * returns the number of bytes written to \p metricArray
438
+ * \param metricArray Returns the IDs of the metrics
439
+ *
440
+ * \retval CUPTI_SUCCESS
441
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or
442
+ * \p metricArray are NULL
443
+ */
444
+ CUptiResult CUPTIAPI cuptiEnumMetrics(size_t *arraySizeBytes,
445
+ CUpti_MetricID *metricArray);
446
+
447
+ /**
448
+ * \brief Get the number of metrics for a device.
449
+ *
450
+ * Returns the number of metrics available for a device.
451
+ *
452
+ * \param device The CUDA device
453
+ * \param numMetrics Returns the number of metrics available for the
454
+ * device
455
+ *
456
+ * \retval CUPTI_SUCCESS
457
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
458
+ * \retval CUPTI_ERROR_INVALID_DEVICE
459
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numMetrics is NULL
460
+ */
461
+ CUptiResult CUPTIAPI cuptiDeviceGetNumMetrics(CUdevice device,
462
+ uint32_t *numMetrics);
463
+
464
+ /**
465
+ * \brief Get the metrics for a device.
466
+ *
467
+ * Returns the metric IDs in \p metricArray for a device. The size of
468
+ * the \p metricArray buffer is given by \p *arraySizeBytes. The size
469
+ * of the \p metricArray buffer must be at least \p numMetrics *
470
+ * sizeof(CUpti_MetricID) or else all metric IDs will not be
471
+ * returned. The value returned in \p *arraySizeBytes contains the
472
+ * number of bytes returned in \p metricArray.
473
+ *
474
+ * \param device The CUDA device
475
+ * \param arraySizeBytes The size of \p metricArray in bytes, and
476
+ * returns the number of bytes written to \p metricArray
477
+ * \param metricArray Returns the IDs of the metrics for the device
478
+ *
479
+ * \retval CUPTI_SUCCESS
480
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
481
+ * \retval CUPTI_ERROR_INVALID_DEVICE
482
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or
483
+ * \p metricArray are NULL
484
+ */
485
+ CUptiResult CUPTIAPI cuptiDeviceEnumMetrics(CUdevice device,
486
+ size_t *arraySizeBytes,
487
+ CUpti_MetricID *metricArray);
488
+
489
+ /**
490
+ * \brief Get a metric attribute.
491
+ *
492
+ * Returns a metric attribute in \p *value. The size of the \p
493
+ * value buffer is given by \p *valueSize. The value returned in \p
494
+ * *valueSize contains the number of bytes returned in \p value.
495
+ *
496
+ * If the attribute value is a c-string that is longer than \p
497
+ * *valueSize, then only the first \p *valueSize characters will be
498
+ * returned and there will be no terminating null byte.
499
+ *
500
+ * \param metric ID of the metric
501
+ * \param attrib The metric attribute to read
502
+ * \param valueSize The size of the \p value buffer in bytes, and
503
+ * returns the number of bytes written to \p value
504
+ * \param value Returns the attribute's value
505
+ *
506
+ * \retval CUPTI_SUCCESS
507
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
508
+ * \retval CUPTI_ERROR_INVALID_METRIC_ID
509
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
510
+ * is NULL, or if \p attrib is not a metric attribute
511
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
512
+ * attribute values, indicates that the \p value buffer is too small
513
+ * to hold the attribute value.
514
+ */
515
+ CUptiResult CUPTIAPI cuptiMetricGetAttribute(CUpti_MetricID metric,
516
+ CUpti_MetricAttribute attrib,
517
+ size_t *valueSize,
518
+ void *value);
519
+
520
+ /**
521
+ * \brief Find an metric by name.
522
+ *
523
+ * Find a metric by name and return the metric ID in \p *metric.
524
+ *
525
+ * \param device The CUDA device
526
+ * \param metricName The name of metric to find
527
+ * \param metric Returns the ID of the found metric or undefined if
528
+ * unable to find the metric
529
+ *
530
+ * \retval CUPTI_SUCCESS
531
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
532
+ * \retval CUPTI_ERROR_INVALID_DEVICE
533
+ * \retval CUPTI_ERROR_INVALID_METRIC_NAME if unable to find a metric
534
+ * with name \p metricName. In this case \p *metric is undefined
535
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p metricName or \p
536
+ * metric are NULL.
537
+ */
538
+ CUptiResult CUPTIAPI cuptiMetricGetIdFromName(CUdevice device,
539
+ const char *metricName,
540
+ CUpti_MetricID *metric);
541
+
542
+ /**
543
+ * \brief Get number of events required to calculate a metric.
544
+ *
545
+ * Returns the number of events in \p numEvents that are required to
546
+ * calculate a metric.
547
+ *
548
+ * \param metric ID of the metric
549
+ * \param numEvents Returns the number of events required for the metric
550
+ *
551
+ * \retval CUPTI_SUCCESS
552
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
553
+ * \retval CUPTI_ERROR_INVALID_METRIC_ID
554
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numEvents is NULL
555
+ */
556
+ CUptiResult CUPTIAPI cuptiMetricGetNumEvents(CUpti_MetricID metric,
557
+ uint32_t *numEvents);
558
+
559
+ /**
560
+ * \brief Get the events required to calculating a metric.
561
+ *
562
+ * Gets the event IDs in \p eventIdArray required to calculate a \p
563
+ * metric. The size of the \p eventIdArray buffer is given by \p
564
+ * *eventIdArraySizeBytes and must be at least \p numEvents *
565
+ * sizeof(CUpti_EventID) or all events will not be returned. The value
566
+ * returned in \p *eventIdArraySizeBytes contains the number of bytes
567
+ * returned in \p eventIdArray.
568
+ *
569
+ * \param metric ID of the metric
570
+ * \param eventIdArraySizeBytes The size of \p eventIdArray in bytes,
571
+ * and returns the number of bytes written to \p eventIdArray
572
+ * \param eventIdArray Returns the IDs of the events required to
573
+ * calculate \p metric
574
+ *
575
+ * \retval CUPTI_SUCCESS
576
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
577
+ * \retval CUPTI_ERROR_INVALID_METRIC_ID
578
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventIdArraySizeBytes or \p
579
+ * eventIdArray are NULL.
580
+ */
581
+ CUptiResult CUPTIAPI cuptiMetricEnumEvents(CUpti_MetricID metric,
582
+ size_t *eventIdArraySizeBytes,
583
+ CUpti_EventID *eventIdArray);
584
+
585
+ /**
586
+ * \brief Get number of properties required to calculate a metric.
587
+ *
588
+ * Returns the number of properties in \p numProp that are required to
589
+ * calculate a metric.
590
+ *
591
+ * \param metric ID of the metric
592
+ * \param numProp Returns the number of properties required for the
593
+ * metric
594
+ *
595
+ * \retval CUPTI_SUCCESS
596
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
597
+ * \retval CUPTI_ERROR_INVALID_METRIC_ID
598
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numProp is NULL
599
+ */
600
+ CUptiResult CUPTIAPI cuptiMetricGetNumProperties(CUpti_MetricID metric,
601
+ uint32_t *numProp);
602
+
603
+ /**
604
+ * \brief Get the properties required to calculating a metric.
605
+ *
606
+ * Gets the property IDs in \p propIdArray required to calculate a \p
607
+ * metric. The size of the \p propIdArray buffer is given by \p
608
+ * *propIdArraySizeBytes and must be at least \p numProp *
609
+ * sizeof(CUpti_DeviceAttribute) or all properties will not be
610
+ * returned. The value returned in \p *propIdArraySizeBytes contains
611
+ * the number of bytes returned in \p propIdArray.
612
+ *
613
+ * \param metric ID of the metric
614
+ * \param propIdArraySizeBytes The size of \p propIdArray in bytes,
615
+ * and returns the number of bytes written to \p propIdArray
616
+ * \param propIdArray Returns the IDs of the properties required to
617
+ * calculate \p metric
618
+ *
619
+ * \retval CUPTI_SUCCESS
620
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
621
+ * \retval CUPTI_ERROR_INVALID_METRIC_ID
622
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p propIdArraySizeBytes or \p
623
+ * propIdArray are NULL.
624
+ */
625
+ CUptiResult CUPTIAPI cuptiMetricEnumProperties(CUpti_MetricID metric,
626
+ size_t *propIdArraySizeBytes,
627
+ CUpti_MetricPropertyID *propIdArray);
628
+
629
+
630
+ /**
631
+ * \brief For a metric get the groups of events that must be collected
632
+ * in the same pass.
633
+ *
634
+ * For a metric get the groups of events that must be collected in the
635
+ * same pass to ensure that the metric is calculated correctly. If the
636
+ * events are not collected as specified then the metric value may be
637
+ * inaccurate.
638
+ *
639
+ * The function returns NULL if a metric does not have any required
640
+ * event group. In this case the events needed for the metric can be
641
+ * grouped in any manner for collection.
642
+ *
643
+ * \param context The context for event collection
644
+ * \param metric The metric ID
645
+ * \param eventGroupSets Returns a CUpti_EventGroupSets object that
646
+ * indicates the events that must be collected in the same pass to
647
+ * ensure the metric is calculated correctly. Returns NULL if no
648
+ * grouping is required for metric
649
+ * \retval CUPTI_SUCCESS
650
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
651
+ * \retval CUPTI_ERROR_INVALID_METRIC_ID
652
+ */
653
+ CUptiResult CUPTIAPI cuptiMetricGetRequiredEventGroupSets(CUcontext context,
654
+ CUpti_MetricID metric,
655
+ CUpti_EventGroupSets **eventGroupSets);
656
+
657
+ /**
658
+ * \brief For a set of metrics, get the grouping that indicates the
659
+ * number of passes and the event groups necessary to collect the
660
+ * events required for those metrics.
661
+ *
662
+ * For a set of metrics, get the grouping that indicates the number of
663
+ * passes and the event groups necessary to collect the events
664
+ * required for those metrics.
665
+ *
666
+ * \see cuptiEventGroupSetsCreate for details on event group set
667
+ * creation.
668
+ *
669
+ * \param context The context for event collection
670
+ * \param metricIdArraySizeBytes Size of the metricIdArray in bytes
671
+ * \param metricIdArray Array of metric IDs
672
+ * \param eventGroupPasses Returns a CUpti_EventGroupSets object that
673
+ * indicates the number of passes required to collect the events and
674
+ * the events to collect on each pass
675
+ *
676
+ * \retval CUPTI_SUCCESS
677
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
678
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
679
+ * \retval CUPTI_ERROR_INVALID_METRIC_ID
680
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p metricIdArray or
681
+ * \p eventGroupPasses is NULL
682
+ */
683
+ CUptiResult CUPTIAPI cuptiMetricCreateEventGroupSets(CUcontext context,
684
+ size_t metricIdArraySizeBytes,
685
+ CUpti_MetricID *metricIdArray,
686
+ CUpti_EventGroupSets **eventGroupPasses);
687
+
688
+ /**
689
+ * \brief Calculate the value for a metric.
690
+ *
691
+ * Use the events collected for a metric to calculate the metric
692
+ * value. Metric value evaluation depends on the evaluation mode
693
+ * \ref CUpti_MetricEvaluationMode that the metric supports.
694
+ * If a metric has evaluation mode as CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE,
695
+ * then it assumes that the input event value is for one domain instance.
696
+ * If a metric has evaluation mode as CUPTI_METRIC_EVALUATION_MODE_AGGREGATE,
697
+ * it assumes that input event values are
698
+ * normalized to represent all domain instances on a device. For the
699
+ * most accurate metric collection, the events required for the metric
700
+ * should be collected for all profiled domain instances. For example,
701
+ * to collect all instances of an event, set the
702
+ * CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES attribute on
703
+ * the group containing the event to 1. The normalized value for the
704
+ * event is then: (\p sum_event_values * \p totalInstanceCount) / \p
705
+ * instanceCount, where \p sum_event_values is the summation of the
706
+ * event values across all profiled domain instances, \p
707
+ * totalInstanceCount is obtained from querying
708
+ * CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT and \p instanceCount
709
+ * is obtained from querying CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT (or
710
+ * CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT).
711
+ *
712
+ * \param device The CUDA device that the metric is being calculated for
713
+ * \param metric The metric ID
714
+ * \param eventIdArraySizeBytes The size of \p eventIdArray in bytes
715
+ * \param eventIdArray The event IDs required to calculate \p metric
716
+ * \param eventValueArraySizeBytes The size of \p eventValueArray in bytes
717
+ * \param eventValueArray The normalized event values required to
718
+ * calculate \p metric. The values must be order to match the order of
719
+ * events in \p eventIdArray
720
+ * \param timeDuration The duration over which the events were
721
+ * collected, in ns
722
+ * \param metricValue Returns the value for the metric
723
+ *
724
+ * \retval CUPTI_SUCCESS
725
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
726
+ * \retval CUPTI_ERROR_INVALID_METRIC_ID
727
+ * \retval CUPTI_ERROR_INVALID_OPERATION
728
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if the
729
+ * eventIdArray does not contain all the events needed for metric
730
+ * \retval CUPTI_ERROR_INVALID_EVENT_VALUE if any of the
731
+ * event values required for the metric is CUPTI_EVENT_OVERFLOW
732
+ * \retval CUPTI_ERROR_INVALID_METRIC_VALUE if the computed metric value
733
+ * cannot be represented in the metric's value type. For example,
734
+ * if the metric value type is unsigned and the computed metric value is negative
735
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p metricValue,
736
+ * \p eventIdArray or \p eventValueArray is NULL
737
+ */
738
+ CUptiResult CUPTIAPI cuptiMetricGetValue(CUdevice device,
739
+ CUpti_MetricID metric,
740
+ size_t eventIdArraySizeBytes,
741
+ CUpti_EventID *eventIdArray,
742
+ size_t eventValueArraySizeBytes,
743
+ uint64_t *eventValueArray,
744
+ uint64_t timeDuration,
745
+ CUpti_MetricValue *metricValue);
746
+
747
+ /**
748
+ * \brief Calculate the value for a metric.
749
+ *
750
+ * Use the events and properties collected for a metric to calculate
751
+ * the metric value. Metric value evaluation depends on the evaluation
752
+ * mode \ref CUpti_MetricEvaluationMode that the metric supports. If
753
+ * a metric has evaluation mode as
754
+ * CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE, then it assumes that the
755
+ * input event value is for one domain instance. If a metric has
756
+ * evaluation mode as CUPTI_METRIC_EVALUATION_MODE_AGGREGATE, it
757
+ * assumes that input event values are normalized to represent all
758
+ * domain instances on a device. For the most accurate metric
759
+ * collection, the events required for the metric should be collected
760
+ * for all profiled domain instances. For example, to collect all
761
+ * instances of an event, set the
762
+ * CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES attribute on
763
+ * the group containing the event to 1. The normalized value for the
764
+ * event is then: (\p sum_event_values * \p totalInstanceCount) / \p
765
+ * instanceCount, where \p sum_event_values is the summation of the
766
+ * event values across all profiled domain instances, \p
767
+ * totalInstanceCount is obtained from querying
768
+ * CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT and \p instanceCount
769
+ * is obtained from querying CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT (or
770
+ * CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT).
771
+ *
772
+ * \param metric The metric ID
773
+ * \param eventIdArraySizeBytes The size of \p eventIdArray in bytes
774
+ * \param eventIdArray The event IDs required to calculate \p metric
775
+ * \param eventValueArraySizeBytes The size of \p eventValueArray in bytes
776
+ * \param eventValueArray The normalized event values required to
777
+ * calculate \p metric. The values must be order to match the order of
778
+ * events in \p eventIdArray
779
+ * \param propIdArraySizeBytes The size of \p propIdArray in bytes
780
+ * \param propIdArray The metric property IDs required to calculate \p metric
781
+ * \param propValueArraySizeBytes The size of \p propValueArray in bytes
782
+ * \param propValueArray The metric property values required to
783
+ * calculate \p metric. The values must be order to match the order of
784
+ * metric properties in \p propIdArray
785
+ * \param metricValue Returns the value for the metric
786
+ *
787
+ * \retval CUPTI_SUCCESS
788
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
789
+ * \retval CUPTI_ERROR_INVALID_METRIC_ID
790
+ * \retval CUPTI_ERROR_INVALID_OPERATION
791
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if the
792
+ * eventIdArray does not contain all the events needed for metric
793
+ * \retval CUPTI_ERROR_INVALID_EVENT_VALUE if any of the
794
+ * event values required for the metric is CUPTI_EVENT_OVERFLOW
795
+ * \retval CUPTI_ERROR_NOT_COMPATIBLE if the computed metric value
796
+ * cannot be represented in the metric's value type. For example,
797
+ * if the metric value type is unsigned and the computed metric value is negative
798
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p metricValue,
799
+ * \p eventIdArray or \p eventValueArray is NULL
800
+ */
801
+ CUptiResult CUPTIAPI cuptiMetricGetValue2(CUpti_MetricID metric,
802
+ size_t eventIdArraySizeBytes,
803
+ CUpti_EventID *eventIdArray,
804
+ size_t eventValueArraySizeBytes,
805
+ uint64_t *eventValueArray,
806
+ size_t propIdArraySizeBytes,
807
+ CUpti_MetricPropertyID *propIdArray,
808
+ size_t propValueArraySizeBytes,
809
+ uint64_t *propValueArray,
810
+ CUpti_MetricValue *metricValue);
811
+
812
+ /** @} */ /* END CUPTI_METRIC_API */
813
+
814
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
815
+ #pragma GCC visibility pop
816
+ #endif
817
+
818
+ #if defined(__cplusplus)
819
+ }
820
+ #endif
821
+
822
+ #endif /*_CUPTI_METRIC_H_*/
823
+
824
+
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_nvtx_cbid.h ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2013-2017 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
51
+ #pragma GCC visibility push(default)
52
+ #endif
53
+
54
+ typedef enum {
55
+ CUPTI_CBID_NVTX_INVALID = 0,
56
+ CUPTI_CBID_NVTX_nvtxMarkA = 1,
57
+ CUPTI_CBID_NVTX_nvtxMarkW = 2,
58
+ CUPTI_CBID_NVTX_nvtxMarkEx = 3,
59
+ CUPTI_CBID_NVTX_nvtxRangeStartA = 4,
60
+ CUPTI_CBID_NVTX_nvtxRangeStartW = 5,
61
+ CUPTI_CBID_NVTX_nvtxRangeStartEx = 6,
62
+ CUPTI_CBID_NVTX_nvtxRangeEnd = 7,
63
+ CUPTI_CBID_NVTX_nvtxRangePushA = 8,
64
+ CUPTI_CBID_NVTX_nvtxRangePushW = 9,
65
+ CUPTI_CBID_NVTX_nvtxRangePushEx = 10,
66
+ CUPTI_CBID_NVTX_nvtxRangePop = 11,
67
+ CUPTI_CBID_NVTX_nvtxNameCategoryA = 12,
68
+ CUPTI_CBID_NVTX_nvtxNameCategoryW = 13,
69
+ CUPTI_CBID_NVTX_nvtxNameOsThreadA = 14,
70
+ CUPTI_CBID_NVTX_nvtxNameOsThreadW = 15,
71
+ CUPTI_CBID_NVTX_nvtxNameCuDeviceA = 16,
72
+ CUPTI_CBID_NVTX_nvtxNameCuDeviceW = 17,
73
+ CUPTI_CBID_NVTX_nvtxNameCuContextA = 18,
74
+ CUPTI_CBID_NVTX_nvtxNameCuContextW = 19,
75
+ CUPTI_CBID_NVTX_nvtxNameCuStreamA = 20,
76
+ CUPTI_CBID_NVTX_nvtxNameCuStreamW = 21,
77
+ CUPTI_CBID_NVTX_nvtxNameCuEventA = 22,
78
+ CUPTI_CBID_NVTX_nvtxNameCuEventW = 23,
79
+ CUPTI_CBID_NVTX_nvtxNameCudaDeviceA = 24,
80
+ CUPTI_CBID_NVTX_nvtxNameCudaDeviceW = 25,
81
+ CUPTI_CBID_NVTX_nvtxNameCudaStreamA = 26,
82
+ CUPTI_CBID_NVTX_nvtxNameCudaStreamW = 27,
83
+ CUPTI_CBID_NVTX_nvtxNameCudaEventA = 28,
84
+ CUPTI_CBID_NVTX_nvtxNameCudaEventW = 29,
85
+ CUPTI_CBID_NVTX_nvtxDomainMarkEx = 30,
86
+ CUPTI_CBID_NVTX_nvtxDomainRangeStartEx = 31,
87
+ CUPTI_CBID_NVTX_nvtxDomainRangeEnd = 32,
88
+ CUPTI_CBID_NVTX_nvtxDomainRangePushEx = 33,
89
+ CUPTI_CBID_NVTX_nvtxDomainRangePop = 34,
90
+ CUPTI_CBID_NVTX_nvtxDomainResourceCreate = 35,
91
+ CUPTI_CBID_NVTX_nvtxDomainResourceDestroy = 36,
92
+ CUPTI_CBID_NVTX_nvtxDomainNameCategoryA = 37,
93
+ CUPTI_CBID_NVTX_nvtxDomainNameCategoryW = 38,
94
+ CUPTI_CBID_NVTX_nvtxDomainRegisterStringA = 39,
95
+ CUPTI_CBID_NVTX_nvtxDomainRegisterStringW = 40,
96
+ CUPTI_CBID_NVTX_nvtxDomainCreateA = 41,
97
+ CUPTI_CBID_NVTX_nvtxDomainCreateW = 42,
98
+ CUPTI_CBID_NVTX_nvtxDomainDestroy = 43,
99
+ CUPTI_CBID_NVTX_nvtxDomainSyncUserCreate = 44,
100
+ CUPTI_CBID_NVTX_nvtxDomainSyncUserDestroy = 45,
101
+ CUPTI_CBID_NVTX_nvtxDomainSyncUserAcquireStart = 46,
102
+ CUPTI_CBID_NVTX_nvtxDomainSyncUserAcquireFailed = 47,
103
+ CUPTI_CBID_NVTX_nvtxDomainSyncUserAcquireSuccess = 48,
104
+ CUPTI_CBID_NVTX_nvtxDomainSyncUserReleasing = 49,
105
+ CUPTI_CBID_NVTX_SIZE,
106
+ CUPTI_CBID_NVTX_FORCE_INT = 0x7fffffff
107
+ } CUpti_nvtx_api_trace_cbid;
108
+
109
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
110
+ #pragma GCC visibility pop
111
+ #endif
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_pcsampling.h ADDED
@@ -0,0 +1,936 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2020-2022 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_PCSAMPLING_H_)
51
+ #define _CUPTI_PCSAMPLING_H_
52
+
53
+ #include <cuda.h>
54
+ #include <stdint.h>
55
+ #include <stddef.h>
56
+ #include "cupti_result.h"
57
+ #include "cupti_common.h"
58
+
59
+
60
+ #if defined(__cplusplus)
61
+ extern "C" {
62
+ #endif
63
+
64
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
65
+ #pragma GCC visibility push(default)
66
+ #endif
67
+
68
+ /**
69
+ * \defgroup CUPTI_PCSAMPLING_API CUPTI PC Sampling API
70
+ * Functions, types, and enums that implement the CUPTI PC Sampling API.
71
+ * @{
72
+ */
73
+
74
+ #ifndef CUPTI_PCSAMPLING_STRUCT_SIZE
75
+ #define CUPTI_PCSAMPLING_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
76
+ #endif
77
+
78
+ #ifndef CUPTI_STALL_REASON_STRING_SIZE
79
+ #define CUPTI_STALL_REASON_STRING_SIZE 128
80
+ #endif
81
+
82
+ /**
83
+ * \brief PC Sampling collection mode
84
+ */
85
+ typedef enum
86
+ {
87
+ /**
88
+ * INVALID Value
89
+ */
90
+ CUPTI_PC_SAMPLING_COLLECTION_MODE_INVALID = 0,
91
+ /**
92
+ * Continuous mode. Kernels are not serialized in this mode.
93
+ */
94
+ CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS = 1,
95
+ /**
96
+ * Serialized mode. Kernels are serialized in this mode.
97
+ */
98
+ CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED = 2,
99
+ } CUpti_PCSamplingCollectionMode;
100
+
101
+ /**
102
+ * \brief PC Sampling stall reasons
103
+ */
104
+ typedef struct PACKED_ALIGNMENT
105
+ {
106
+ /**
107
+ * [r] Collected stall reason index
108
+ */
109
+ uint32_t pcSamplingStallReasonIndex;
110
+ /**
111
+ * [r] Number of times the PC was sampled with the stallReason.
112
+ */
113
+ uint32_t samples;
114
+ } CUpti_PCSamplingStallReason;
115
+
116
+ /**
117
+ * \brief PC Sampling data
118
+ */
119
+ typedef struct PACKED_ALIGNMENT
120
+ {
121
+ /**
122
+ * [w] Size of the data structure.
123
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
124
+ * available in the structure. Used to preserve backward compatibility.
125
+ */
126
+ size_t size;
127
+ /**
128
+ * [r] Unique cubin id
129
+ */
130
+ uint64_t cubinCrc;
131
+ /**
132
+ * [r] PC offset
133
+ */
134
+ uint64_t pcOffset;
135
+ /**
136
+ * The function's unique symbol index in the module.
137
+ */
138
+ uint32_t functionIndex;
139
+ /**
140
+ * Padding
141
+ */
142
+ uint32_t pad;
143
+ /**
144
+ * [r] The function name. This name string might be shared across all the records
145
+ * including records from activity APIs representing the same function, and so it should not be
146
+ * modified or freed until post processing of all the records is done. Once done, it is user’s responsibility to
147
+ * free the memory using free() function.
148
+ */
149
+ char* functionName;
150
+ /**
151
+ * [r] Collected stall reason count
152
+ */
153
+ size_t stallReasonCount;
154
+ /**
155
+ * [r] Stall reason id
156
+ * Total samples
157
+ */
158
+ CUpti_PCSamplingStallReason *stallReason;
159
+ /**
160
+ * The correlation ID of the kernel to which this result is associated. Only valid for serialized mode of pc sampling collection.
161
+ * For continous mode of collection the correlationId will be set to 0.
162
+ */
163
+ uint32_t correlationId;
164
+ } CUpti_PCSamplingPCData;
165
+
166
+ /**
167
+ * \brief PC Sampling output data format
168
+ */
169
+ typedef enum
170
+ {
171
+ CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_INVALID = 0,
172
+ /**
173
+ * HW buffer data will be parsed during collection of data
174
+ */
175
+ CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_PARSED = 1,
176
+ } CUpti_PCSamplingOutputDataFormat;
177
+
178
+ /**
179
+ * \brief Collected PC Sampling data
180
+ *
181
+ */
182
+ typedef struct PACKED_ALIGNMENT
183
+ {
184
+ /**
185
+ * [w] Size of the data structure.
186
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
187
+ * available in the structure. Used to preserve backward compatibility.
188
+ */
189
+ size_t size;
190
+ /**
191
+ * [w] Number of PCs to be collected
192
+ */
193
+ size_t collectNumPcs;
194
+ /**
195
+ * [r] Number of samples collected across all PCs.
196
+ * It includes samples for user modules, samples for non-user kernels and dropped samples.
197
+ * It includes counts for all non selected stall reasons.
198
+ * CUPTI does not provide PC records for non-user kernels.
199
+ * CUPTI does not provide PC records for instructions for which all selected stall reason metrics counts are zero.
200
+ */
201
+ uint64_t totalSamples;
202
+ /**
203
+ * [r] Number of samples that were dropped by hardware due to backpressure/overflow.
204
+ */
205
+ uint64_t droppedSamples;
206
+ /**
207
+ * [r] Number of PCs collected
208
+ */
209
+ size_t totalNumPcs;
210
+ /**
211
+ * [r] Number of PCs available for collection
212
+ */
213
+ size_t remainingNumPcs;
214
+ /**
215
+ * [r] Unique identifier for each range.
216
+ * Data collected across multiple ranges in multiple buffers can be identified using range id.
217
+ */
218
+ uint64_t rangeId;
219
+ /**
220
+ * [r] Profiled PC data
221
+ * This data struct should have enough memory to collect number of PCs mentioned in \brief collectNumPcs
222
+ */
223
+ CUpti_PCSamplingPCData *pPcData;
224
+ /**
225
+ * [r] Number of samples collected across all non user kernels PCs.
226
+ * It includes samples for non-user kernels.
227
+ * It includes counts for all non selected stall reasons as well.
228
+ * CUPTI does not provide PC records for non-user kernels.
229
+ */
230
+ uint64_t nonUsrKernelsTotalSamples;
231
+
232
+ /**
233
+ * [r] Status of the hardware buffer.
234
+ * CUPTI returns the error code CUPTI_ERROR_OUT_OF_MEMORY when hardware buffer is full.
235
+ * When hardware buffer is full, user will get pc data as 0. To mitigate this issue, one or more of the below options can be tried:
236
+ * 1. Increase the hardware buffer size using the attribute CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE
237
+ * 2. Decrease the thread sleep span using the attribute CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_WORKER_THREAD_PERIODIC_SLEEP_SPAN
238
+ * 3. Decrease the sampling frequency using the attribute CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD
239
+ */
240
+ uint8_t hardwareBufferFull;
241
+ } CUpti_PCSamplingData;
242
+
243
+ /**
244
+ * \brief PC Sampling configuration attributes
245
+ *
246
+ * PC Sampling configuration attribute types. These attributes can be read
247
+ * using \ref cuptiPCSamplingGetConfigurationAttribute and can be written
248
+ * using \ref cuptiPCSamplingSetConfigurationAttribute. Attributes marked
249
+ * [r] can only be read using \ref cuptiPCSamplingGetConfigurationAttribute
250
+ * [w] can only be written using \ref cuptiPCSamplingSetConfigurationAttribute
251
+ * [rw] can be read using \ref cuptiPCSamplingGetConfigurationAttribute and
252
+ * written using \ref cuptiPCSamplingSetConfigurationAttribute
253
+ */
254
+ typedef enum
255
+ {
256
+ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_INVALID = 0,
257
+ /**
258
+ * [rw] Sampling period for PC Sampling.
259
+ * DEFAULT - CUPTI defined value based on number of SMs
260
+ * Valid values for the sampling
261
+ * periods are between 5 to 31 both inclusive. This will set the
262
+ * sampling period to (2^samplingPeriod) cycles.
263
+ * For e.g. for sampling period = 5 to 31, cycles = 32, 64, 128,..., 2^31
264
+ * Value is a uint32_t
265
+ */
266
+ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD = 1,
267
+ /**
268
+ * [w] Number of stall reasons to collect.
269
+ * DEFAULT - All stall reasons will be collected
270
+ * Value is a size_t
271
+ * [w] Stall reasons to collect
272
+ * DEFAULT - All stall reasons will be collected
273
+ * Input value should be a pointer pointing to array of stall reason indexes
274
+ * containing all the stall reason indexes to collect.
275
+ */
276
+ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON = 2,
277
+ /**
278
+ * [rw] Size of SW buffer for raw PC counter data downloaded from HW buffer
279
+ * DEFAULT - 1 MB, which can accommodate approximately 5500 PCs
280
+ * with all stall reasons
281
+ * Approximately it takes 16 Bytes (and some fixed size memory)
282
+ * to accommodate one PC with one stall reason
283
+ * For e.g. 1 PC with 1 stall reason = 32 Bytes
284
+ * 1 PC with 2 stall reason = 48 Bytes
285
+ * 1 PC with 4 stall reason = 96 Bytes
286
+ * Value is a size_t
287
+ */
288
+ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SCRATCH_BUFFER_SIZE = 3,
289
+ /**
290
+ * [rw] Size of HW buffer in bytes
291
+ * DEFAULT - 512 MB
292
+ * If sampling period is too less, HW buffer can overflow
293
+ * and drop PC data
294
+ * Value is a size_t
295
+ */
296
+ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE = 4,
297
+ /**
298
+ * [rw] PC Sampling collection mode
299
+ * DEFAULT - CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS
300
+ * Input value should be of type \ref CUpti_PCSamplingCollectionMode.
301
+ */
302
+ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE = 5,
303
+ /**
304
+ * [rw] Control over PC Sampling data collection range
305
+ * Default - 0
306
+ * 1 - Allows user to start and stop PC Sampling using APIs -
307
+ * \ref cuptiPCSamplingStart() - Start PC Sampling
308
+ * \ref cuptiPCSamplingStop() - Stop PC Sampling
309
+ * Value is a uint32_t
310
+ */
311
+ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL = 6,
312
+ /**
313
+ * [w] Value for output data format
314
+ * Default - CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_PARSED
315
+ * Input value should be of type \ref CUpti_PCSamplingOutputDataFormat.
316
+ */
317
+ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_OUTPUT_DATA_FORMAT = 7,
318
+ /**
319
+ * [w] Data buffer to hold collected PC Sampling data PARSED_DATA
320
+ * Default - none.
321
+ * Buffer type is void * which can point to PARSED_DATA
322
+ * Refer \ref CUpti_PCSamplingData for buffer format for PARSED_DATA
323
+ */
324
+ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_DATA_BUFFER = 8,
325
+ /**
326
+ * [rw] Control sleep time of the worker threads created by CUPTI for various PC sampling operations.
327
+ * CUPTI creates multiple worker threads to offload certain operations to these threads. This includes decoding of HW data to
328
+ * the CUPTI PC sampling data and correlating PC data to SASS instructions. CUPTI wakes up these threads periodically.
329
+ * Default - 100 milliseconds.
330
+ * Value is a uint32_t
331
+ */
332
+ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_WORKER_THREAD_PERIODIC_SLEEP_SPAN = 9,
333
+ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_FORCE_INT = 0x7fffffff,
334
+ } CUpti_PCSamplingConfigurationAttributeType;
335
+
336
+ /**
337
+ * \brief PC sampling configuration information structure
338
+ *
339
+ * This structure provides \ref CUpti_PCSamplingConfigurationAttributeType which can be configured
340
+ * or queried for PC sampling configuration
341
+ */
342
+ typedef struct
343
+ {
344
+ /**
345
+ * Refer \ref CUpti_PCSamplingConfigurationAttributeType for all supported attribute types
346
+ */
347
+ CUpti_PCSamplingConfigurationAttributeType attributeType;
348
+ /*
349
+ * Configure or query status for \p attributeType
350
+ * CUPTI_SUCCESS for valid \p attributeType and \p attributeData
351
+ * CUPTI_ERROR_INVALID_OPERATION if \p attributeData is not valid
352
+ * CUPTI_ERROR_INVALID_PARAMETER if \p attributeType is not valid
353
+ */
354
+ CUptiResult attributeStatus;
355
+ union
356
+ {
357
+ /**
358
+ * Invalid Value
359
+ */
360
+ struct
361
+ {
362
+ uint64_t data[3];
363
+ } invalidData;
364
+ /**
365
+ * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD
366
+ */
367
+ struct
368
+ {
369
+ uint32_t samplingPeriod;
370
+ } samplingPeriodData;
371
+ /**
372
+ * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON
373
+ */
374
+ struct
375
+ {
376
+ size_t stallReasonCount;
377
+ uint32_t *pStallReasonIndex;
378
+ } stallReasonData;
379
+ /**
380
+ * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SCRATCH_BUFFER_SIZE
381
+ */
382
+ struct
383
+ {
384
+ size_t scratchBufferSize;
385
+ } scratchBufferSizeData;
386
+ /**
387
+ * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE
388
+ */
389
+ struct
390
+ {
391
+ size_t hardwareBufferSize;
392
+ } hardwareBufferSizeData;
393
+ /**
394
+ * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE
395
+ */
396
+ struct
397
+ {
398
+ CUpti_PCSamplingCollectionMode collectionMode;
399
+ } collectionModeData;
400
+ /**
401
+ * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL
402
+ */
403
+ struct
404
+ {
405
+ uint32_t enableStartStopControl;
406
+ } enableStartStopControlData;
407
+ /**
408
+ * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_OUTPUT_DATA_FORMAT
409
+ */
410
+ struct
411
+ {
412
+ CUpti_PCSamplingOutputDataFormat outputDataFormat;
413
+ } outputDataFormatData;
414
+ /**
415
+ * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_DATA_BUFFER
416
+ */
417
+ struct
418
+ {
419
+ void *samplingDataBuffer;
420
+ } samplingDataBufferData;
421
+ /**
422
+ * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_WORKER_THREAD_PERIODIC_SLEEP_SPAN
423
+ */
424
+ struct
425
+ {
426
+ uint32_t workerThreadPeriodicSleepSpan;
427
+ } workerThreadPeriodicSleepSpanData;
428
+
429
+ } attributeData;
430
+ } CUpti_PCSamplingConfigurationInfo;
431
+
432
+ /**
433
+ * \brief PC sampling configuration structure
434
+ *
435
+ * This structure configures PC sampling using \ref cuptiPCSamplingSetConfigurationAttribute
436
+ * and queries PC sampling default configuration using \ref cuptiPCSamplingGetConfigurationAttribute
437
+ */
438
+ typedef struct
439
+ {
440
+ /**
441
+ * [w] Size of the data structure i.e. CUpti_PCSamplingConfigurationInfoParamsSize
442
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
443
+ * available in the structure. Used to preserve backward compatibility.
444
+ */
445
+ size_t size;
446
+ /**
447
+ * [w] Assign to NULL
448
+ */
449
+ void* pPriv;
450
+ /**
451
+ * [w] CUcontext
452
+ */
453
+ CUcontext ctx;
454
+ /**
455
+ * [w] Number of attributes to configure using \ref cuptiPCSamplingSetConfigurationAttribute or query
456
+ * using \ref cuptiPCSamplingGetConfigurationAttribute
457
+ */
458
+ size_t numAttributes;
459
+ /**
460
+ * Refer \ref CUpti_PCSamplingConfigurationInfo
461
+ */
462
+ CUpti_PCSamplingConfigurationInfo *pPCSamplingConfigurationInfo;
463
+ } CUpti_PCSamplingConfigurationInfoParams;
464
+ #define CUpti_PCSamplingConfigurationInfoParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingConfigurationInfoParams,pPCSamplingConfigurationInfo)
465
+
466
+ /**
467
+ * \brief Write PC Sampling configuration attribute.
468
+ *
469
+ * \param pParams A pointer to \ref CUpti_PCSamplingConfigurationInfoParams
470
+ * containing PC sampling configuration.
471
+ *
472
+ * \retval CUPTI_SUCCESS
473
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
474
+ * some invalid \p attrib.
475
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if attribute \p value is not valid
476
+ * or any \p pParams is not valid
477
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
478
+ * does not support the API
479
+ */
480
+ CUptiResult CUPTIAPI cuptiPCSamplingSetConfigurationAttribute(CUpti_PCSamplingConfigurationInfoParams *pParams);
481
+
482
+ /**
483
+ * \brief Read PC Sampling configuration attribute.
484
+ *
485
+ * \param pParams A pointer to \ref CUpti_PCSamplingConfigurationInfoParams
486
+ * containing PC sampling configuration.
487
+ *
488
+ * \retval CUPTI_SUCCESS
489
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
490
+ * some invalid attribute.
491
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p attrib is not valid
492
+ * or any \p pParams is not valid
493
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT indicates that
494
+ * the \p value buffer is too small to hold the attribute value
495
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
496
+ * does not support the API
497
+ */
498
+ CUptiResult CUPTIAPI cuptiPCSamplingGetConfigurationAttribute(CUpti_PCSamplingConfigurationInfoParams *pParams);
499
+
500
+ /**
501
+ * \brief Params for cuptiPCSamplingEnable
502
+ */
503
+ typedef struct
504
+ {
505
+ /**
506
+ * [w] Size of the data structure i.e. CUpti_PCSamplingGetDataParamsSize
507
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
508
+ * available in the structure. Used to preserve backward compatibility.
509
+ */
510
+ size_t size;
511
+ /**
512
+ * [w] Assign to NULL
513
+ */
514
+ void* pPriv;
515
+ /**
516
+ * [w] CUcontext
517
+ */
518
+ CUcontext ctx;
519
+ /**
520
+ * \param pcSamplingData Data buffer to hold collected PC Sampling data PARSED_DATA
521
+ * Buffer type is void * which can point to PARSED_DATA
522
+ * Refer \ref CUpti_PCSamplingData for buffer format for PARSED_DATA
523
+ */
524
+ void *pcSamplingData;
525
+ } CUpti_PCSamplingGetDataParams;
526
+ #define CUpti_PCSamplingGetDataParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingGetDataParams, pcSamplingData)
527
+ /**
528
+ * \brief Flush GPU PC sampling data periodically.
529
+ *
530
+ * Flushing of GPU PC Sampling data is required at following point to maintain uniqueness of PCs:
531
+ * For \brief CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS, after every module load-unload-load
532
+ * For \brief CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED, after every kernel ends
533
+ * If configuration option \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL
534
+ * is enabled, then after every range end i.e. \brief cuptiPCSamplingStop()
535
+ *
536
+ * If application is profiled in \brief CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS, with disabled
537
+ * \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL, and there is no module unload,
538
+ * user can collect data in two ways:
539
+ * Use \brief cuptiPCSamplingGetData() API periodically
540
+ * Use \brief cuptiPCSamplingDisable() on application exit and read GPU PC sampling data from sampling
541
+ * data buffer passed during configuration.
542
+ * Note: In case, \brief cuptiPCSamplingGetData() API is not called periodically, then sampling data buffer
543
+ * passed during configuration should be large enough to hold all PCs data.
544
+ * \brief cuptiPCSamplingGetData() API never does device synchronization.
545
+ * It is possible that when the API is called there is some unconsumed data from the HW buffer. In this case
546
+ * CUPTI provides only the data available with it at that moment.
547
+ *
548
+ * \param pParams A pointer to \ref CUpti_PCSamplingGetDataParams
549
+ *
550
+ * \retval CUPTI_SUCCESS
551
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called without
552
+ * enabling PC sampling.
553
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
554
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
555
+ * \retval CUPTI_ERROR_OUT_OF_MEMORY indicates that the HW buffer is full
556
+ * does not support the API
557
+ */
558
+ CUptiResult CUPTIAPI cuptiPCSamplingGetData(CUpti_PCSamplingGetDataParams *pParams);
559
+
560
+ /**
561
+ * \brief Params for cuptiPCSamplingEnable
562
+ */
563
+ typedef struct
564
+ {
565
+ /**
566
+ * [w] Size of the data structure i.e. CUpti_PCSamplingEnableParamsSize
567
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
568
+ * available in the structure. Used to preserve backward compatibility.
569
+ */
570
+ size_t size;
571
+ /**
572
+ * [w] Assign to NULL
573
+ */
574
+ void* pPriv;
575
+ /**
576
+ * [w] CUcontext
577
+ */
578
+ CUcontext ctx;
579
+ } CUpti_PCSamplingEnableParams;
580
+ #define CUpti_PCSamplingEnableParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingEnableParams, ctx)
581
+
582
+ /**
583
+ * \brief Enable PC sampling.
584
+ *
585
+ * \param pParams A pointer to \ref CUpti_PCSamplingEnableParams
586
+ *
587
+ * \retval CUPTI_SUCCESS
588
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
589
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
590
+ * does not support the API
591
+ */
592
+ CUptiResult CUPTIAPI cuptiPCSamplingEnable(CUpti_PCSamplingEnableParams *pParams);
593
+
594
+ /**
595
+ * \brief Params for cuptiPCSamplingDisable
596
+ */
597
+ typedef struct
598
+ {
599
+ /**
600
+ * [w] Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
601
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
602
+ * available in the structure. Used to preserve backward compatibility.
603
+ */
604
+ size_t size;
605
+ /**
606
+ * [w] Assign to NULL
607
+ */
608
+ void* pPriv;
609
+ /**
610
+ * [w] CUcontext
611
+ */
612
+ CUcontext ctx;
613
+ } CUpti_PCSamplingDisableParams;
614
+ #define CUpti_PCSamplingDisableParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingDisableParams, ctx)
615
+
616
+ /**
617
+ * \brief Disable PC sampling.
618
+ *
619
+ * For application which doesn't destroy the CUDA context explicitly,
620
+ * this API does the PC Sampling tear-down, joins threads and copies PC records in the buffer provided
621
+ * during the PC sampling configuration. PC records which can't be accommodated in the buffer are discarded.
622
+ *
623
+ * \param pParams A pointer to \ref CUpti_PCSamplingDisableParams
624
+ *
625
+ * \retval CUPTI_SUCCESS
626
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
627
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
628
+ * does not support the API
629
+ */
630
+ CUptiResult CUPTIAPI cuptiPCSamplingDisable(CUpti_PCSamplingDisableParams *pParams);
631
+
632
+ /**
633
+ * \brief Params for cuptiPCSamplingStart
634
+ */
635
+ typedef struct
636
+ {
637
+ /**
638
+ * [w] Size of the data structure i.e. CUpti_PCSamplingStartParamsSize
639
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
640
+ * available in the structure. Used to preserve backward compatibility.
641
+ */
642
+ size_t size;
643
+ /**
644
+ * [w] Assign to NULL
645
+ */
646
+ void* pPriv;
647
+ /**
648
+ * [w] CUcontext
649
+ */
650
+ CUcontext ctx;
651
+ } CUpti_PCSamplingStartParams;
652
+ #define CUpti_PCSamplingStartParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingStartParams, ctx)
653
+
654
+ /**
655
+ * \brief Start PC sampling.
656
+ *
657
+ * User can collect PC Sampling data for user-defined range specified by Start/Stop APIs.
658
+ * This API can be used to mark starting of range. Set configuration option
659
+ * \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL to use this API.
660
+ *
661
+ * \param pParams A pointer to \ref CUpti_PCSamplingStartParams
662
+ *
663
+ * \retval CUPTI_SUCCESS
664
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
665
+ * incorrect PC Sampling configuration.
666
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
667
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
668
+ * does not support the API
669
+ */
670
+ CUptiResult CUPTIAPI cuptiPCSamplingStart(CUpti_PCSamplingStartParams *pParams);
671
+
672
+ /**
673
+ * \brief Params for cuptiPCSamplingStop
674
+ */
675
+ typedef struct
676
+ {
677
+ /**
678
+ * [w] Size of the data structure i.e. CUpti_PCSamplingStopParamsSize
679
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
680
+ * available in the structure. Used to preserve backward compatibility.
681
+ */
682
+ size_t size;
683
+ /**
684
+ * [w] Assign to NULL
685
+ */
686
+ void* pPriv;
687
+ /**
688
+ * [w] CUcontext
689
+ */
690
+ CUcontext ctx;
691
+ } CUpti_PCSamplingStopParams;
692
+ #define CUpti_PCSamplingStopParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingStopParams, ctx)
693
+
694
+ /**
695
+ * \brief Stop PC sampling.
696
+ *
697
+ * User can collect PC Sampling data for user-defined range specified by Start/Stop APIs.
698
+ * This API can be used to mark end of range. Set configuration option
699
+ * \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL to use this API.
700
+ *
701
+ * \param pParams A pointer to \ref CUpti_PCSamplingStopParams
702
+ *
703
+ * \retval CUPTI_SUCCESS
704
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
705
+ * incorrect PC Sampling configuration.
706
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
707
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
708
+ * does not support the API
709
+ */
710
+ CUptiResult CUPTIAPI cuptiPCSamplingStop(CUpti_PCSamplingStopParams *pParams);
711
+
712
+ /**
713
+ * \brief Params for cuptiPCSamplingGetNumStallReasons
714
+ */
715
+ typedef struct
716
+ {
717
+ /**
718
+ * [w] Size of the data structure i.e. CUpti_PCSamplingGetNumStallReasonsParamsSize
719
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
720
+ * available in the structure. Used to preserve backward compatibility.
721
+ */
722
+ size_t size;
723
+ /**
724
+ * [w] Assign to NULL
725
+ */
726
+ void* pPriv;
727
+ /**
728
+ * [w] CUcontext
729
+ */
730
+ CUcontext ctx;
731
+ /**
732
+ * [r] Number of stall reasons
733
+ */
734
+ size_t *numStallReasons;
735
+ } CUpti_PCSamplingGetNumStallReasonsParams;
736
+ #define CUpti_PCSamplingGetNumStallReasonsParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingGetNumStallReasonsParams, numStallReasons)
737
+
738
+ /**
739
+ * \brief Get PC sampling stall reason count.
740
+ *
741
+ * \param pParams A pointer to \ref CUpti_PCSamplingGetNumStallReasonsParams
742
+ *
743
+ * \retval CUPTI_SUCCESS
744
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
745
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
746
+ * does not support the API
747
+ */
748
+ CUptiResult CUPTIAPI cuptiPCSamplingGetNumStallReasons(CUpti_PCSamplingGetNumStallReasonsParams *pParams);
749
+
750
+ /**
751
+ * \brief Params for cuptiPCSamplingGetStallReasons
752
+ */
753
+ typedef struct
754
+ {
755
+ /**
756
+ * [w] Size of the data structure i.e. CUpti_PCSamplingGetStallReasonsParamsSize
757
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
758
+ * available in the structure. Used to preserve backward compatibility.
759
+ */
760
+ size_t size;
761
+ /**
762
+ * [w] Assign to NULL
763
+ */
764
+ void* pPriv;
765
+ /**
766
+ * [w] CUcontext
767
+ */
768
+ CUcontext ctx;
769
+ /**
770
+ * [w] Number of stall reasons
771
+ */
772
+ size_t numStallReasons;
773
+ /**
774
+ * [r] Stall reason index
775
+ */
776
+ uint32_t *stallReasonIndex;
777
+ /**
778
+ * [r] Stall reasons name
779
+ */
780
+ char **stallReasons;
781
+ } CUpti_PCSamplingGetStallReasonsParams;
782
+ #define CUpti_PCSamplingGetStallReasonsParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingGetStallReasonsParams, stallReasons)
783
+
784
+ /**
785
+ * \brief Get PC sampling stall reasons.
786
+ *
787
+ * \param pParams A pointer to \ref CUpti_PCSamplingGetStallReasonsParams
788
+ *
789
+ * \retval CUPTI_SUCCESS
790
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
791
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
792
+ * does not support the API
793
+ */
794
+ CUptiResult CUPTIAPI cuptiPCSamplingGetStallReasons(CUpti_PCSamplingGetStallReasonsParams *pParams);
795
+
796
+
797
+ /**
798
+ * \brief Params for cuptiGetSassToSourceCorrelation
799
+ */
800
+ typedef struct CUpti_GetSassToSourceCorrelationParams {
801
+ /**
802
+ * [w] Size of the data structure i.e. CUpti_GetSassToSourceCorrelationParamsSize
803
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
804
+ * available in the structure. Used to preserve backward compatibility.
805
+ */
806
+ size_t size;
807
+ /**
808
+ * [w] Pointer to cubin binary where function belongs.
809
+ */
810
+ const void* cubin;
811
+ /**
812
+ * [w] Function name to which PC belongs.
813
+ */
814
+ const char *functionName;
815
+ /**
816
+ * [w] Size of cubin binary.
817
+ */
818
+ size_t cubinSize;
819
+ /**
820
+ * [r] Line number in the source code.
821
+ */
822
+ uint32_t lineNumber;
823
+ /**
824
+ * [w] PC offset
825
+ */
826
+ uint64_t pcOffset;
827
+ /**
828
+ * [r] Path for the source file.
829
+ */
830
+ char *fileName;
831
+ /**
832
+ * [r] Path for the directory of source file.
833
+ */
834
+ char *dirName;
835
+ } CUpti_GetSassToSourceCorrelationParams;
836
+
837
+ #define CUpti_GetSassToSourceCorrelationParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_GetSassToSourceCorrelationParams, dirName)
838
+
839
+ /**
840
+ * \brief SASS to Source correlation.
841
+ *
842
+ * \param pParams A pointer to \ref CUpti_GetSassToSourceCorrelationParams
843
+ *
844
+ * It is expected from user to free allocated memory for fileName and dirName after use.
845
+ *
846
+ * \retval CUPTI_SUCCESS
847
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if either of the parameters cubin or functionName
848
+ * is NULL or cubinSize is zero or size field is not set correctly.
849
+ * \retval CUPTI_ERROR_INVALID_MODULE provided cubin is invalid.
850
+ * \retval CUPTI_ERROR_UNKNOWN an internal error occurred.
851
+ * This error code is also used for cases when the function is not present in the module.
852
+ * A better error code will be returned in the future release.
853
+ */
854
+ CUptiResult CUPTIAPI cuptiGetSassToSourceCorrelation(CUpti_GetSassToSourceCorrelationParams *pParams);
855
+
856
+ /**
857
+ * \brief Params for cuptiGetCubinCrc
858
+ */
859
+ typedef struct {
860
+ /**
861
+ * [w] Size of configuration structure.
862
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
863
+ * available in the structure. Used to preserve backward compatibility.
864
+ */
865
+ size_t size;
866
+ /**
867
+ * [w] Size of cubin binary.
868
+ */
869
+ size_t cubinSize;
870
+ /**
871
+ * [w] Pointer to cubin binary
872
+ */
873
+ const void* cubin;
874
+ /**
875
+ * [r] Computed CRC will be stored in it.
876
+ */
877
+ uint64_t cubinCrc;
878
+ } CUpti_GetCubinCrcParams;
879
+ #define CUpti_GetCubinCrcParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_GetCubinCrcParams, cubinCrc)
880
+
881
+ /**
882
+ * \brief Get the CRC of cubin.
883
+ *
884
+ * This function returns the CRC of provided cubin binary.
885
+ *
886
+ * \param pParams A pointer to \ref CUpti_GetCubinCrcParams
887
+ *
888
+ * \retval CUPTI_SUCCESS
889
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if parameter cubin is NULL or
890
+ * provided cubinSize is zero or size field is not set.
891
+ */
892
+ CUptiResult CUPTIAPI cuptiGetCubinCrc(CUpti_GetCubinCrcParams *pParams);
893
+
894
+ /**
895
+ * \brief Function type for callback used by CUPTI to request crc of
896
+ * loaded module.
897
+ *
898
+ * This callback function ask for crc of provided module in function.
899
+ * The provided crc will be stored in PC sampling records i.e. in the field 'cubinCrc' of the PC sampling
900
+ * struct CUpti_PCSamplingPCData. The CRC is uses during the offline source correlation to uniquely identify the module.
901
+ *
902
+ * \param cubin The pointer to cubin binary
903
+ * \param cubinSize The size of cubin binary.
904
+ * \param cubinCrc Returns the computed crc of cubin.
905
+ */
906
+ typedef void (CUPTIAPI *CUpti_ComputeCrcCallbackFunc)(
907
+ const void* cubin,
908
+ size_t cubinSize,
909
+ uint64_t *cubinCrc);
910
+
911
+ /**
912
+ * \brief Register callback function with CUPTI to use
913
+ * your own algorithm to compute cubin crc.
914
+ *
915
+ * This function registers a callback function and it gets called
916
+ * from CUPTI when a CUDA module is loaded.
917
+ *
918
+ * \param funcComputeCubinCrc callback is invoked when a CUDA module
919
+ * is loaded.
920
+ *
921
+ * \retval CUPTI_SUCCESS
922
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p funcComputeCubinCrc is NULL.
923
+ */
924
+ CUptiResult CUPTIAPI cuptiRegisterComputeCrcCallback(CUpti_ComputeCrcCallbackFunc funcComputeCubinCrc);
925
+
926
+ /** @} */ /* END CUPTI_PCSAMPLING_API */
927
+
928
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
929
+ #pragma GCC visibility pop
930
+ #endif
931
+
932
+ #if defined(__cplusplus)
933
+ }
934
+ #endif
935
+
936
+ #endif /*_CUPTI_PCSAMPLING_H_*/
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_pcsampling_util.h ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #if !defined(_CUPTI_PCSAMPLING_UTIL_H_)
2
+ #define _CUPTI_PCSAMPLING_UTIL_H_
3
+
4
+ #include <cupti_pcsampling.h>
5
+ #include <fstream>
6
+
7
+ #include <cupti_common.h>
8
+
9
+ #ifndef CUPTI_UTIL_STRUCT_SIZE
10
+ #define CUPTI_UTIL_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
11
+ #endif
12
+
13
+ #ifndef CHECK_PC_SAMPLING_STRUCT_FIELD_EXISTS
14
+ #define CHECK_PC_SAMPLING_STRUCT_FIELD_EXISTS(type, member, structSize) \
15
+ (offsetof(type, member) < structSize)
16
+ #endif
17
+
18
+ #if defined(__cplusplus)
19
+ extern "C" {
20
+ #endif
21
+
22
+ #if defined(__GNUC__)
23
+ #pragma GCC visibility push(default)
24
+ #endif
25
+
26
+ namespace CUPTI { namespace PcSamplingUtil {
27
+
28
+ /**
29
+ * \defgroup CUPTI_PCSAMPLING_UTILITY CUPTI PC Sampling Utility API
30
+ * Functions, types, and enums that implement the CUPTI PC Sampling Utility API.
31
+ * @{
32
+ */
33
+
34
+ /**
35
+ * \brief Header info will be stored in file.
36
+ */
37
+ typedef struct PACKED_ALIGNMENT {
38
+ /**
39
+ * Version of file format.
40
+ */
41
+ uint32_t version;
42
+ /**
43
+ * Total number of buffers present in the file.
44
+ */
45
+ uint32_t totalBuffers;
46
+ } Header;
47
+
48
+ /**
49
+ * \brief BufferInfo will be stored in the file for every buffer
50
+ * i.e for every call of UtilDumpPcSamplingBufferInFile() API.
51
+ */
52
+ typedef struct PACKED_ALIGNMENT {
53
+ /**
54
+ * Total number of PC records.
55
+ */
56
+ uint64_t recordCount;
57
+ /**
58
+ * Count of all stall reasons supported on the GPU
59
+ */
60
+ size_t numStallReasons;
61
+ /**
62
+ * Total number of stall reasons in single record.
63
+ */
64
+ uint64_t numSelectedStallReasons;
65
+ /**
66
+ * Buffer size in Bytes.
67
+ */
68
+ uint64_t bufferByteSize;
69
+ } BufferInfo;
70
+
71
+ /**
72
+ * \brief All available stall reasons name and respective indexes
73
+ * will be stored in it.
74
+ */
75
+ typedef struct PACKED_ALIGNMENT {
76
+ /**
77
+ * Number of all available stall reasons
78
+ */
79
+ size_t numStallReasons;
80
+ /**
81
+ * Stall reasons names of all available stall reasons
82
+ */
83
+ char **stallReasons;
84
+ /**
85
+ * Stall reason index of all available stall reasons
86
+ */
87
+ uint32_t *stallReasonIndex;
88
+ } PcSamplingStallReasons;
89
+
90
+ /**
91
+ * \brief CUPTI PC sampling buffer types.
92
+ *
93
+ */
94
+ typedef enum {
95
+ /**
96
+ * Invalid buffer type.
97
+ */
98
+ PC_SAMPLING_BUFFER_INVALID = 0,
99
+ /**
100
+ * Refers to CUpti_PCSamplingData buffer.
101
+ */
102
+ PC_SAMPLING_BUFFER_PC_TO_COUNTER_DATA = 1
103
+ } PcSamplingBufferType;
104
+
105
+ /**
106
+ * \brief CUPTI PC sampling utility API result codes.
107
+ *
108
+ * Error and result codes returned by CUPTI PC sampling utility API.
109
+ */
110
+ typedef enum {
111
+ /**
112
+ * No error
113
+ */
114
+ CUPTI_UTIL_SUCCESS = 0,
115
+ /**
116
+ * One or more of the parameters are invalid.
117
+ */
118
+ CUPTI_UTIL_ERROR_INVALID_PARAMETER = 1,
119
+ /**
120
+ * Unable to create a new file
121
+ */
122
+ CUPTI_UTIL_ERROR_UNABLE_TO_CREATE_FILE = 2,
123
+ /**
124
+ * Unable to open a file
125
+ */
126
+ CUPTI_UTIL_ERROR_UNABLE_TO_OPEN_FILE = 3,
127
+ /**
128
+ * Read or write operation failed
129
+ */
130
+ CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED = 4,
131
+ /**
132
+ * Provided file handle is corrupted.
133
+ */
134
+ CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED = 5,
135
+ /**
136
+ * seek operation failed.
137
+ */
138
+ CUPTI_UTIL_ERROR_SEEK_OPERATION_FAILED = 6,
139
+ /**
140
+ * Unable to allocate enough memory to perform the requested
141
+ * operation.
142
+ */
143
+ CUPTI_UTIL_ERROR_OUT_OF_MEMORY = 7,
144
+ /**
145
+ * An unknown internal error has occurred.
146
+ */
147
+ CUPTI_UTIL_ERROR_UNKNOWN = 999,
148
+ CUPTI_UTIL_ERROR_FORCE_INT = 0x7fffffff
149
+ } CUptiUtilResult;
150
+
151
+ /**
152
+ * \brief Params for \ref CuptiUtilPutPcSampData
153
+ */
154
+ typedef struct {
155
+ /**
156
+ * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
157
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
158
+ * available in the structure. Used to preserve backward compatibility.
159
+ */
160
+ size_t size;
161
+ /**
162
+ * Type of buffer to store in file
163
+ */
164
+ PcSamplingBufferType bufferType;
165
+ /**
166
+ * PC sampling buffer.
167
+ */
168
+ void *pSamplingData;
169
+ /**
170
+ * Number of configured attributes
171
+ */
172
+ size_t numAttributes;
173
+ /**
174
+ * Refer \ref CUpti_PCSamplingConfigurationInfo
175
+ * It is expected to provide configuration details of at least
176
+ * CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON attribute.
177
+ */
178
+ CUpti_PCSamplingConfigurationInfo *pPCSamplingConfigurationInfo;
179
+ /**
180
+ * Refer \ref PcSamplingStallReasons.
181
+ */
182
+ PcSamplingStallReasons *pPcSamplingStallReasons;
183
+ /**
184
+ * File name to store buffer into it.
185
+ */
186
+ const char* fileName;
187
+ } CUptiUtil_PutPcSampDataParams;
188
+ #define CUptiUtil_PutPcSampDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_PutPcSampDataParams, fileName)
189
+
190
+ /**
191
+ * \brief Dump PC sampling data into the file.
192
+ *
193
+ * This API can be called multiple times.
194
+ * It will append buffer in the file.
195
+ * For every buffer it will store BufferInfo
196
+ * so that before retrieving data it will help to allocate buffer
197
+ * to store retrieved data.
198
+ * This API creates file if file does not present.
199
+ * If stallReasonIndex or stallReasons pointer of \ref CUptiUtil_PutPcSampDataParams is NULL
200
+ * then stall reasons data will not be stored in file.
201
+ * It is expected to store all available stall reason data at least once to refer it during
202
+ * offline correlation.
203
+ *
204
+ * \retval CUPTI_UTIL_SUCCESS
205
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if buffer type is invalid
206
+ * or if either of pSamplingData, pParams pointer is NULL or stall reason configuration details not provided
207
+ * or filename is empty.
208
+ * \retval CUPTI_UTIL_ERROR_UNABLE_TO_CREATE_FILE
209
+ * \retval CUPTI_UTIL_ERROR_UNABLE_TO_OPEN_FILE
210
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED
211
+ */
212
+ CUptiUtilResult CUPTIUTILAPI CuptiUtilPutPcSampData(CUptiUtil_PutPcSampDataParams *pParams);
213
+
214
+ /**
215
+ * \brief Params for \ref CuptiUtilGetHeaderData
216
+ */
217
+ typedef struct {
218
+ /**
219
+ * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
220
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
221
+ * available in the structure. Used to preserve backward compatibility.
222
+ */
223
+ size_t size;
224
+ /**
225
+ * File handle.
226
+ */
227
+ std::ifstream *fileHandler;
228
+ /**
229
+ * Header Info.
230
+ */
231
+ Header headerInfo;
232
+
233
+ } CUptiUtil_GetHeaderDataParams;
234
+ #define CUptiUtil_GetHeaderDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_GetHeaderDataParams, headerInfo)
235
+
236
+ /**
237
+ * \brief Get header data of file.
238
+ *
239
+ * This API must be called once initially while retrieving data from file.
240
+ * \ref Header structure, it gives info about total number
241
+ * of buffers present in the file.
242
+ *
243
+ * \retval CUPTI_UTIL_SUCCESS
244
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if either of pParam or fileHandle is NULL or param struct size is incorrect.
245
+ * \retval CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file
246
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED failed to read data from file.
247
+ */
248
+ CUptiUtilResult CUPTIUTILAPI CuptiUtilGetHeaderData(CUptiUtil_GetHeaderDataParams *pParams);
249
+
250
+ /**
251
+ * \brief Params for \ref CuptiUtilGetBufferInfo
252
+ */
253
+ typedef struct {
254
+ /**
255
+ * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
256
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
257
+ * available in the structure. Used to preserve backward compatibility.
258
+ */
259
+ size_t size;
260
+ /**
261
+ * File handle.
262
+ */
263
+ std::ifstream *fileHandler;
264
+ /**
265
+ * Buffer Info.
266
+ */
267
+ BufferInfo bufferInfoData;
268
+ } CUptiUtil_GetBufferInfoParams;
269
+ #define CUptiUtil_GetBufferInfoParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_GetBufferInfoParams, bufferInfoData)
270
+
271
+ /**
272
+ * \brief Get buffer info data of file.
273
+ *
274
+ * This API must be called every time before calling CuptiUtilGetPcSampData API.
275
+ * \ref BufferInfo structure, it gives info about recordCount and stallReasonCount
276
+ * of every record in the buffer. This will help to allocate exact buffer to retrieve data into it.
277
+ *
278
+ * \retval CUPTI_UTIL_SUCCESS
279
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if either of pParam or fileHandle is NULL or param struct size is incorrect.
280
+ * \retval CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file.
281
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED failed to read data from file.
282
+ */
283
+ CUptiUtilResult CUPTIUTILAPI CuptiUtilGetBufferInfo(CUptiUtil_GetBufferInfoParams *pParams);
284
+
285
+ /**
286
+ * \brief Params for \ref CuptiUtilGetPcSampData
287
+ */
288
+ typedef struct {
289
+ /**
290
+ * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
291
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
292
+ * available in the structure. Used to preserve backward compatibility.
293
+ */
294
+ size_t size;
295
+ /**
296
+ * File handle.
297
+ */
298
+ std::ifstream *fileHandler;
299
+ /**
300
+ * Type of buffer to store in file
301
+ */
302
+ PcSamplingBufferType bufferType;
303
+ /**
304
+ * Pointer to collected buffer info using \ref CuptiUtilGetBufferInfo
305
+ */
306
+ BufferInfo *pBufferInfoData;
307
+ /**
308
+ * Pointer to allocated memory to store retrieved data from file.
309
+ */
310
+ void *pSamplingData;
311
+ /**
312
+ * Number of configuration attributes
313
+ */
314
+ size_t numAttributes;
315
+ /**
316
+ * Refer \ref CUpti_PCSamplingConfigurationInfo
317
+ */
318
+ CUpti_PCSamplingConfigurationInfo *pPCSamplingConfigurationInfo;
319
+ /**
320
+ * Refer \ref PcSamplingStallReasons.
321
+ * For stallReasons field of \ref PcSamplingStallReasons it is expected to
322
+ * allocate memory for each string element of array.
323
+ */
324
+ PcSamplingStallReasons *pPcSamplingStallReasons;
325
+ } CUptiUtil_GetPcSampDataParams;
326
+ #define CUptiUtil_GetPcSampDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_GetPcSampDataParams, pPcSamplingStallReasons)
327
+
328
+ /**
329
+ * \brief Retrieve PC sampling data from file into allocated buffer.
330
+ *
331
+ * This API must be called after CuptiUtilGetBufferInfo API.
332
+ * It will retrieve data from file into allocated buffer.
333
+ *
334
+ * \retval CUPTI_UTIL_SUCCESS
335
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if buffer type is invalid
336
+ * or if either of pSampData, pParams is NULL. If pPcSamplingStallReasons is not NULL then
337
+ * error out if either of stallReasonIndex, stallReasons or stallReasons array element pointer is NULL.
338
+ * or filename is empty.
339
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED
340
+ * \retval CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file.
341
+ */
342
+ CUptiUtilResult CUPTIUTILAPI CuptiUtilGetPcSampData(CUptiUtil_GetPcSampDataParams *pParams);
343
+
344
+ /**
345
+ * \brief Params for \ref CuptiUtilMergePcSampData
346
+ */
347
+ typedef struct
348
+ {
349
+ /**
350
+ * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
351
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
352
+ * available in the structure. Used to preserve backward compatibility.
353
+ */
354
+ size_t size;
355
+ /**
356
+ * Number of buffers to merge.
357
+ */
358
+ size_t numberOfBuffers;
359
+ /**
360
+ * Pointer to array of buffers to merge
361
+ */
362
+ CUpti_PCSamplingData *PcSampDataBuffer;
363
+ /**
364
+ * Pointer to array of merged buffers as per the range id.
365
+ */
366
+ CUpti_PCSamplingData **MergedPcSampDataBuffers;
367
+ /**
368
+ * Number of merged buffers.
369
+ */
370
+ size_t *numMergedBuffer;
371
+ } CUptiUtil_MergePcSampDataParams;
372
+ #define CUptiUtil_MergePcSampDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_MergePcSampDataParams, numMergedBuffer)
373
+
374
+ /**
375
+ * \brief Merge PC sampling data range id wise.
376
+ *
377
+ * This API merge PC sampling data range id wise.
378
+ * It allocates memory for merged data and fill data in it
379
+ * and provide buffer pointer in MergedPcSampDataBuffers field.
380
+ * It is expected from user to free merge data buffers after use.
381
+ *
382
+ * \retval CUPTI_UTIL_SUCCESS
383
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if param struct size is invalid
384
+ * or count of buffers to merge is invalid i.e less than 1
385
+ * or either of PcSampDataBuffer, MergedPcSampDataBuffers, numMergedBuffer is NULL
386
+ * \retval CUPTI_UTIL_ERROR_OUT_OF_MEMORY Unable to allocate memory for merged buffer.
387
+ */
388
+ CUptiUtilResult CUPTIUTILAPI CuptiUtilMergePcSampData(CUptiUtil_MergePcSampDataParams *pParams);
389
+
390
+ /** @} */ /* END CUPTI_PCSAMPLING_UTILITY */
391
+
392
+ } }
393
+
394
+ #if defined(__GNUC__)
395
+ #pragma GCC visibility pop
396
+ #endif
397
+
398
+ #if defined(__cplusplus)
399
+ }
400
+ #endif
401
+
402
+ #endif
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_pmsampling.h ADDED
@@ -0,0 +1,490 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2024 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_PMSAMPLING_H_)
51
+ #define _CUPTI_PMSAMPLING_H_
52
+
53
+ #include <cuda.h>
54
+ #include <cupti_result.h>
55
+ #include <stddef.h>
56
+ #include <stdint.h>
57
+
58
+ #ifdef __cplusplus
59
+ extern "C" {
60
+ #endif
61
+
62
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
63
+ #pragma GCC visibility push(default)
64
+ #endif
65
+
66
+ #ifndef CUPTI_PROFILER_STRUCT_SIZE
67
+ #define CUPTI_PROFILER_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
68
+ #endif
69
+
70
+ /* CUPTI PM sampling APIs */
71
+ /**
72
+ * \defgroup CUPTI_PM_SAMPLING_API CUPTI PM Sampling API
73
+ * Functions to enable, disable, start, stop, and decode PM sampling.
74
+ * @{
75
+ */
76
+ typedef struct CUpti_PmSampling_Object CUpti_PmSampling_Object;
77
+
78
+ typedef enum CUpti_PmSampling_TriggerMode
79
+ {
80
+ /// The trigger is based off of the SYSCLK frequency, note SYS frequency by default is variable.
81
+ /// the sample interval (set in the struct CUpti_PmSampling_SetConfig_Params) is in terms of clocks.
82
+ CUPTI_PM_SAMPLING_TRIGGER_MODE_GPU_SYSCLK_INTERVAL = 0,
83
+ /// The trigger is based off of a fixed frequency source.
84
+ /// The sample interval (set in the struct CUpti_PmSampling_SetConfig_Params) is in terms of nanoseconds.
85
+ /// Note: This trigger mode is not supported on Turing GPU architecture and GA100 GPU.
86
+ /// It is supported on Ampere GA10x and later GPU architectures.
87
+ CUPTI_PM_SAMPLING_TRIGGER_MODE_GPU_TIME_INTERVAL = 1,
88
+ CUPTI_PM_SAMPLING_TRIGGER_MODE_COUNT
89
+ } CUpti_PmSampling_TriggerMode;
90
+
91
+ typedef enum CUpti_PmSampling_DecodeStopReason
92
+ {
93
+ CUPTI_PM_SAMPLING_DECODE_STOP_REASON_OTHER = 0,
94
+ /// Counter data image is full.
95
+ CUPTI_PM_SAMPLING_DECODE_STOP_REASON_COUNTER_DATA_FULL,
96
+ /// All the records in the hardware buffer is decoded.
97
+ CUPTI_PM_SAMPLING_DECODE_STOP_REASON_END_OF_RECORDS,
98
+ CUPTI_PM_SAMPLING_DECODE_STOP_REASON_COUNT
99
+ } CUpti_PmSampling_DecodeStopReason;
100
+
101
+ typedef enum CUpti_PmSampling_HardwareBuffer_AppendMode
102
+ {
103
+ /// Keep the oldest records in the hardware buffer.
104
+ /// CUPTI will report error for overflow in case hardware buffer is getting filled up.
105
+ CUPTI_PM_SAMPLING_HARDWARE_BUFFER_APPEND_MODE_KEEP_OLDEST = 0,
106
+ /// Keep the latest records in the hardware buffer.
107
+ /// Note: This mode is not supported on Turing GPU architecture.
108
+ /// It is supported on Ampere and later GPU architectures.
109
+ CUPTI_PM_SAMPLING_HARDWARE_BUFFER_APPEND_MODE_KEEP_LATEST = 1
110
+ } CUpti_PmSampling_HardwareBuffer_AppendMode;
111
+
112
+ /**
113
+ * \brief Params for cuptiPmSamplingSetConfig
114
+ */
115
+ typedef struct CUpti_PmSampling_SetConfig_Params
116
+ {
117
+ /// [in] Size of the data structure.
118
+ size_t structSize;
119
+ /// [in] Set to NULL.
120
+ void* pPriv;
121
+ /// [in] PM sampling object.
122
+ CUpti_PmSampling_Object* pPmSamplingObject;
123
+ /// [in] Size of the config image.
124
+ size_t configSize;
125
+ /// [in] Config image.
126
+ const uint8_t* pConfig;
127
+ /// [in] The hardware buffer size in which raw PM sampling data
128
+ /// will be stored. These samples will be decoded to counter data
129
+ /// image with \ref cuptiPmSamplingDecodeData call.
130
+ size_t hardwareBufferSize;
131
+ /// [in] For the trigger mode `CUPTI_PM_SAMPLING_TRIGGER_MODE_GPU_SYSCLK_INTERVAL`, sampling interval
132
+ /// is the number of sys clock cycles. For the trigger mode `CUPTI_PM_SAMPLING_TRIGGER_MODE_GPU_TIME_INTERVAL`,
133
+ /// sampling interval is in nanoseconds.
134
+ uint64_t samplingInterval;
135
+ /// [in] Trigger mode.
136
+ /// Note: CUPTI_PM_SAMPLING_TRIGGER_MODE_GPU_TIME_INTERVAL is not supported in Turing and GA100.
137
+ /// Supported from GA10x onwards.
138
+ CUpti_PmSampling_TriggerMode triggerMode;
139
+ /// [in] Append mode for the records in hardware buffer.
140
+ /// For KEEP_OLDEST mode, all the records will be kept in the buffer and in case hardware buffer is getting filled up.
141
+ /// overflow will be set to 1 in \ref CUpti_PmSampling_DecodeData_Params. For KEEP_LATEST mode, the new records will
142
+ /// overwrite the oldest records in the buffer in case of filled buffer.
143
+ CUpti_PmSampling_HardwareBuffer_AppendMode hwBufferAppendMode;
144
+ } CUpti_PmSampling_SetConfig_Params;
145
+
146
+ #define CUpti_PmSampling_SetConfig_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_SetConfig_Params, hwBufferAppendMode)
147
+
148
+ /**
149
+ * \brief Set the configuration for PM sampling like sampling interval, maximum number of samples
150
+ * filled in HW buffer, trigger mode and the config image which has scheduling info for metric collection.
151
+ *
152
+ * \param pParams A pointer to \ref CUpti_PmSampling_SetConfig_Params
153
+ *
154
+ * \retval CUPTI_SUCCESS
155
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
156
+ * \retval CUPTI_ERROR_NOT_SUPPORTED for config image which require multiple passes for data collection
157
+ */
158
+ CUptiResult CUPTIAPI cuptiPmSamplingSetConfig(CUpti_PmSampling_SetConfig_Params* pParams);
159
+
160
+ /**
161
+ * \brief Params for cuptiPmSamplingEnable
162
+ */
163
+ typedef struct CUpti_PmSampling_Enable_Params
164
+ {
165
+ /// [in] Size of the data structure.
166
+ size_t structSize;
167
+ /// [in] Set to NULL.
168
+ void* pPriv;
169
+ /// [in] Device index.
170
+ size_t deviceIndex;
171
+ /// [out] PM sampling object.
172
+ CUpti_PmSampling_Object* pPmSamplingObject;
173
+ } CUpti_PmSampling_Enable_Params;
174
+
175
+ #define CUpti_PmSampling_Enable_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Enable_Params, pPmSamplingObject)
176
+
177
+ /**
178
+ * \brief Create a PM sampling object and enable PM sampling on the CUDA device.
179
+ *
180
+ * \param pParams A pointer to \ref CUpti_PmSampling_Enable_Params
181
+ *
182
+ * \retval CUPTI_SUCCESS
183
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
184
+ * \retval CUPTI_ERROR_OUT_OF_MEMORY if memory allocation fails while creating the PM sampling object
185
+ * \retval CUPTI_ERROR_INVALID_OPERATION if PM sampling is already enabled on the device
186
+ * \retval CUPTI_ERROR_INSUFFICIENT_PRIVILEGES if the user does not have sufficient privileges to perform the operation
187
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
188
+ */
189
+ CUptiResult CUPTIAPI cuptiPmSamplingEnable(CUpti_PmSampling_Enable_Params* pParams);
190
+
191
+ /**
192
+ * \brief Params for cuptiPmSamplingDisable
193
+ */
194
+ typedef struct CUpti_PmSampling_Disable_Params
195
+ {
196
+ /// [in] Size of the data structure.
197
+ size_t structSize;
198
+ /// [in] Set to NULL.
199
+ void* pPriv;
200
+ /// [in] PM sampling object.
201
+ CUpti_PmSampling_Object* pPmSamplingObject;
202
+ } CUpti_PmSampling_Disable_Params;
203
+
204
+ #define CUpti_PmSampling_Disable_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Disable_Params, pPmSamplingObject)
205
+
206
+ /**
207
+ * \brief Disable PM sampling on the CUDA device and destroy the PM sampling object.
208
+ *
209
+ * \param pParams A pointer to \ref CUpti_PmSampling_Disable_Params
210
+ *
211
+ * \retval CUPTI_SUCCESS
212
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
213
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
214
+ */
215
+ CUptiResult CUPTIAPI cuptiPmSamplingDisable(CUpti_PmSampling_Disable_Params* pParams);
216
+
217
+ /**
218
+ * \brief Params for cuptiPmSamplingStart
219
+ */
220
+ typedef struct CUpti_PmSampling_Start_Params
221
+ {
222
+ /// [in] Size of the data structure.
223
+ size_t structSize;
224
+ /// [in] Set to NULL.
225
+ void* pPriv;
226
+ /// [in] PM sampling object.
227
+ CUpti_PmSampling_Object* pPmSamplingObject;
228
+ } CUpti_PmSampling_Start_Params;
229
+
230
+ #define CUpti_PmSampling_Start_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Start_Params, pPmSamplingObject)
231
+
232
+ /**
233
+ * \brief Start the PM sampling. The GPU will start collecting the metrics data
234
+ * periodically based on trigger type and sampling interval passed in CUpti_PmSampling_SetConfig_Params.
235
+ * The collected data will be stored in the hardware buffer.
236
+ *
237
+ * \param pParams A pointer to \ref CUpti_PmSampling_Start_Params
238
+ *
239
+ * \retval CUPTI_SUCCESS
240
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
241
+ * \retval CUPTI_ERROR_INVALID_OPERATION if PM sampling Start is called without enabling PM sampling,
242
+ * and PM sampling is already started
243
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
244
+ */
245
+ CUptiResult CUPTIAPI cuptiPmSamplingStart(CUpti_PmSampling_Start_Params* pParams);
246
+
247
+ /**
248
+ * \brief Params for cuptiPmSamplingStop
249
+ */
250
+ typedef struct CUpti_PmSampling_Stop_Params
251
+ {
252
+ /// [in] Size of the data structure.
253
+ size_t structSize;
254
+ /// [in] Set to NULL.
255
+ void* pPriv;
256
+ /// [in] PM sampling object.
257
+ CUpti_PmSampling_Object* pPmSamplingObject;
258
+ } CUpti_PmSampling_Stop_Params;
259
+
260
+ #define CUpti_PmSampling_Stop_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Stop_Params, pPmSamplingObject)
261
+
262
+ /**
263
+ * \brief Stop the PM sampling. The GPU will stop collecting the metrics data.
264
+ *
265
+ * \param pParams A pointer to \ref CUpti_PmSampling_Stop_Params
266
+ *
267
+ * \retval CUPTI_SUCCESS
268
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
269
+ * \retval CUPTI_ERROR_INVALID_OPERATION if PM sampling Stop is called without enabling PM sampling,
270
+ * and PM sampling is already stopped
271
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
272
+ */
273
+ CUptiResult CUPTIAPI cuptiPmSamplingStop(CUpti_PmSampling_Stop_Params* pParams);
274
+
275
+ /**
276
+ * \brief Params for cuptiPmSamplingDecodeData
277
+ */
278
+ typedef struct CUpti_PmSampling_DecodeData_Params
279
+ {
280
+ /// [in] Size of the data structure.
281
+ size_t structSize;
282
+ /// [in] Set to NULL.
283
+ void* pPriv;
284
+ /// [in] PM sampling object.
285
+ CUpti_PmSampling_Object* pPmSamplingObject;
286
+ /// [in] Counter data image.
287
+ uint8_t* pCounterDataImage;
288
+ /// [in] Size of the counter data image.
289
+ size_t counterDataImageSize;
290
+ /// [out] decode stop reason
291
+ CUpti_PmSampling_DecodeStopReason decodeStopReason;
292
+ /// [out] overflow status for hardware buffer.
293
+ /// To avoid overflow, either increase the maxSamples values in
294
+ /// \ref CUpti_PmSampling_SetConfig_Params or reduce the sampling interval.
295
+ uint8_t overflow;
296
+ } CUpti_PmSampling_DecodeData_Params;
297
+
298
+ #define CUpti_PmSampling_DecodeData_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_DecodeData_Params, overflow)
299
+
300
+ /**
301
+ * \brief Decode the metrics data stored in the hardware buffer to the counter data image.
302
+ *
303
+ *
304
+ * \param pParams A pointer to \ref CUpti_PmSampling_DecodeData_Params
305
+ *
306
+ * \retval CUPTI_SUCCESS
307
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
308
+ * \retval CUPTI_ERROR_INVALID_OPERATION if PM sampling DecodeData is called without enabling PM sampling
309
+ * \retval CUPTI_ERROR_OUT_OF_MEMORY if there is record overflow in the hardware buffer
310
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
311
+ */
312
+ CUptiResult CUPTIAPI cuptiPmSamplingDecodeData(CUpti_PmSampling_DecodeData_Params* pParams);
313
+
314
+ /**
315
+ * \brief Params for cuptiPmSamplingGetCounterData
316
+ */
317
+ typedef struct CUpti_PmSampling_GetCounterAvailability_Params
318
+ {
319
+ /// [in] Size of the data structure.
320
+ size_t structSize;
321
+ /// [in] Set to NULL.
322
+ void* pPriv;
323
+ /// [in] Device index.
324
+ size_t deviceIndex;
325
+ /// [inout] Size of the counter availability image. When pCounterAvailabilityImage is NULL,
326
+ /// this field is used to return the size of the counter availability image.
327
+ size_t counterAvailabilityImageSize;
328
+ /// [out] Counter availability image.
329
+ uint8_t* pCounterAvailabilityImage;
330
+ } CUpti_PmSampling_GetCounterAvailability_Params;
331
+ #define CUpti_PmSampling_GetCounterAvailability_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterAvailability_Params, pCounterAvailabilityImage)
332
+
333
+ /**
334
+ * \brief Query counter availibility information in a buffer which can be used to filter unavailable raw metrics on host.
335
+ * Note: This API may fail, if any profiling or sampling session is active on the specified device.
336
+ *
337
+ * \param pParams A pointer to \ref CUpti_PmSampling_GetCounterAvailability_Params
338
+ *
339
+ * \retval CUPTI_SUCCESS
340
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
341
+ * \retval CUPTI_ERROR_INSUFFICIENT_PRIVILEGES if the user does not have sufficient privileges to perform the operation
342
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
343
+ */
344
+ CUptiResult CUPTIAPI cuptiPmSamplingGetCounterAvailability(CUpti_PmSampling_GetCounterAvailability_Params* pParams);
345
+
346
+ /**
347
+ * \brief Params for cuptiPmSamplingGetCounterDataSize
348
+ */
349
+ typedef struct CUpti_PmSampling_GetCounterDataSize_Params
350
+ {
351
+ /// [in] Size of the data structure.
352
+ size_t structSize;
353
+ /// [in] Set to NULL.
354
+ void* pPriv;
355
+ /// [in] PM sampling object.
356
+ CUpti_PmSampling_Object* pPmSamplingObject;
357
+ /// [in] Names of the metrics to be collected.
358
+ const char** pMetricNames;
359
+ /// [in] Number of metrics to be collected.
360
+ size_t numMetrics;
361
+ /// [in] Maximum number of samples to be stored in the counter data image.
362
+ uint32_t maxSamples;
363
+ /// [out] Size of the counter data image.
364
+ size_t counterDataSize;
365
+ } CUpti_PmSampling_GetCounterDataSize_Params;
366
+ #define CUpti_PmSampling_GetCounterDataSize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterDataSize_Params, counterDataSize)
367
+
368
+ /**
369
+ * \brief Query the size of the counter data image which will be used to store the metrics data.
370
+ * User need to allocate the memory for the counter data image based on the size returned by this API.
371
+ *
372
+ * \param pParams A pointer to \ref CUpti_PmSampling_GetCounterDataSize_Params
373
+ *
374
+ * \retval CUPTI_SUCCESS
375
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
376
+ * \retval CUPTI_ERROR_INVALID_OPERATION if PM sampling GetCounterDataSize is called without enabling PM sampling
377
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
378
+ */
379
+ CUptiResult CUPTIAPI cuptiPmSamplingGetCounterDataSize(CUpti_PmSampling_GetCounterDataSize_Params* pParams);
380
+
381
+ /**
382
+ * \brief Params for cuptiPmSamplingCounterDataImageInitialize
383
+ */
384
+ typedef struct CUpti_PmSampling_CounterDataImage_Initialize_Params
385
+ {
386
+ /// [in] Size of the data structure.
387
+ size_t structSize;
388
+ /// [in] Set to NULL.
389
+ void* pPriv;
390
+ /// [in] PM sampling object.
391
+ CUpti_PmSampling_Object* pPmSamplingObject;
392
+ /// [in] Size of the counter data image.
393
+ size_t counterDataSize;
394
+ /// [in] Counter data image.
395
+ uint8_t* pCounterData;
396
+ } CUpti_PmSampling_CounterDataImage_Initialize_Params;
397
+ #define CUpti_PmSampling_CounterDataImage_Initialize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_CounterDataImage_Initialize_Params, pCounterData)
398
+
399
+ /**
400
+ * \brief Initialize the counter data to CUPTI record format for storing the metric data.
401
+ *
402
+ * \param pParams A pointer to \ref CUpti_PmSampling_CounterDataImage_Initialize_Params
403
+ *
404
+ * \retval CUPTI_SUCCESS
405
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
406
+ * \retval CUPTI_ERROR_INVALID_OPERATION if PM sampling CounterDataInitialize is called without enabling PM sampling
407
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
408
+ */
409
+ CUptiResult CUPTIAPI cuptiPmSamplingCounterDataImageInitialize(CUpti_PmSampling_CounterDataImage_Initialize_Params* pParams);
410
+
411
+ /**
412
+ * \brief Params for cuptiPmSamplingGetCounterDataInfo
413
+ */
414
+ typedef struct CUpti_PmSampling_GetCounterDataInfo_Params
415
+ {
416
+ /// [in] Size of the data structure.
417
+ size_t structSize;
418
+ /// [in] Set to NULL.
419
+ void* pPriv;
420
+ /// [in] Counter data image.
421
+ const uint8_t* pCounterDataImage;
422
+ /// [in] Size of the counter data image.
423
+ size_t counterDataImageSize;
424
+ /// [out] Number of samples in the counter data image.
425
+ size_t numTotalSamples;
426
+ /// [out] Number of populated samples.
427
+ size_t numPopulatedSamples;
428
+ /// [out] Number of samples that have been completed.
429
+ size_t numCompletedSamples;
430
+ } CUpti_PmSampling_GetCounterDataInfo_Params;
431
+ #define CUpti_PmSampling_GetCounterDataInfo_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterDataInfo_Params, numCompletedSamples)
432
+
433
+ /**
434
+ * \brief Get the counter data info like number of samples, number of populated
435
+ * samples and number of completed samples in a counter data image.
436
+ *
437
+ * \param pParams A pointer to \ref CUpti_PmSampling_GetCounterDataInfo_Params
438
+ *
439
+ * \retval CUPTI_SUCCESS
440
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
441
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
442
+ */
443
+ CUptiResult CUPTIAPI cuptiPmSamplingGetCounterDataInfo(CUpti_PmSampling_GetCounterDataInfo_Params* pParams);
444
+
445
+ /**
446
+ * \brief Params for cuptiPmSamplingCounterDataGetSampleInfo
447
+ */
448
+ typedef struct CUpti_PmSampling_CounterData_GetSampleInfo_Params
449
+ {
450
+ /// [in] Size of the data structure.
451
+ size_t structSize;
452
+ /// [in] Set to NULL.
453
+ void* pPriv;
454
+ /// [in] PM sampling object.
455
+ CUpti_PmSampling_Object* pPmSamplingObject;
456
+ /// [in] Counter data image.
457
+ const uint8_t* pCounterDataImage;
458
+ /// [in] Size of the counter data image.
459
+ size_t counterDataImageSize;
460
+ /// [in] Index of the sample.
461
+ size_t sampleIndex;
462
+ /// [out] Start time of the sample.
463
+ uint64_t startTimestamp;
464
+ /// [out] End time of the sample.
465
+ uint64_t endTimestamp;
466
+ } CUpti_PmSampling_CounterData_GetSampleInfo_Params;
467
+ #define CUpti_PmSampling_CounterData_GetSampleInfo_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_CounterData_GetSampleInfo_Params, endTimestamp)
468
+
469
+ /**
470
+ * \brief Get the sample info (start and end time stamp) for the given sample index.
471
+ * Each sample is distinguished by the start and end time stamp.
472
+ *
473
+ * \param pParams A pointer to \ref CUpti_PmSampling_CounterData_GetSampleInfo_Params
474
+ *
475
+ * \retval CUPTI_SUCCESS
476
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
477
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
478
+ */
479
+ CUptiResult CUPTIAPI cuptiPmSamplingCounterDataGetSampleInfo(CUpti_PmSampling_CounterData_GetSampleInfo_Params* pParams);
480
+
481
+ /** @} */ /* END CUPTI_PMSAMPLING_API */
482
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
483
+ #pragma GCC visibility pop
484
+ #endif
485
+
486
+ #ifdef __cplusplus
487
+ } /* extern "C" */
488
+ #endif
489
+
490
+ #endif // _CUPTI_PMSAMPLING_H_
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_profiler_host.h ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2024 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_PROFILER_HOST_H_)
51
+ #define _CUPTI_PROFILER_HOST_H_
52
+
53
+ /*
54
+ CUPTI profiler host API's
55
+ This file contains the CUPTI profiling host API's.
56
+ */
57
+ #include <cupti_result.h>
58
+ #include <stdint.h>
59
+ #include <stddef.h>
60
+ #include <string>
61
+
62
+ #ifdef __cplusplus
63
+ extern "C" {
64
+ #endif
65
+
66
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
67
+ #pragma GCC visibility push(default)
68
+ #endif
69
+
70
+ /**
71
+ * \defgroup CUPTI_PROFILER_HOST_API CUPTI Profiler Host API
72
+ * Functions, types, and enums that implement the CUPTI Profiler Host API.
73
+ * @{
74
+ */
75
+ #ifndef CUPTI_PROFILER_STRUCT_SIZE
76
+ #define CUPTI_PROFILER_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
77
+ #endif
78
+
79
+ typedef enum CUpti_MetricType
80
+ {
81
+ CUPTI_METRIC_TYPE_COUNTER = 0,
82
+ CUPTI_METRIC_TYPE_RATIO,
83
+ CUPTI_METRIC_TYPE_THROUGHPUT,
84
+ CUPTI_METRIC_TYPE__COUNT
85
+ } CUpti_MetricType;
86
+
87
+ typedef enum CUpti_ProfilerType
88
+ {
89
+ CUPTI_PROFILER_TYPE_RANGE_PROFILER,
90
+ CUPTI_PROFILER_TYPE_PM_SAMPLING,
91
+ CUPTI_PROFILER_TYPE_PROFILER_INVALID
92
+ } CUpti_ProfilerType;
93
+
94
+ typedef struct CUpti_Profiler_Host_Object CUpti_Profiler_Host_Object;
95
+
96
+ /**
97
+ * \brief Params for cuptiProfilerHostInitialize
98
+ */
99
+ typedef struct CUpti_Profiler_Host_Initialize_Params
100
+ {
101
+ /// [in] Size of the data structure.
102
+ /// CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
103
+ /// available in the structure. Used to preserve backward compatibility.
104
+ size_t structSize;
105
+ /// [in] Assign to NULL
106
+ void* pPriv;
107
+ /// [in] the profiler kind one from CUpti_ProfilerType
108
+ CUpti_ProfilerType profilerType;
109
+ /// [in] accepted for chips supported at the time-of-release.
110
+ const char* pChipName;
111
+ /// [in] buffer with counter availability image - required for future chip support
112
+ const uint8_t* pCounterAvailabilityImage;
113
+ /// [out] binary blob allocated by CUPTI and operations associated with this object.
114
+ CUpti_Profiler_Host_Object* pHostObject;
115
+ } CUpti_Profiler_Host_Initialize_Params;
116
+
117
+ #define CUpti_Profiler_Host_Initialize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_Initialize_Params, pHostObject)
118
+
119
+ /**
120
+ * \brief Create and initialize the profiler host object (CUpti_Profiler_Host_Object).
121
+ *
122
+ * \param pParams A pointer to \ref CUpti_Profiler_Host_Initialize_Params
123
+ *
124
+ * \retval CUPTI_SUCCESS
125
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
126
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
127
+ */
128
+ CUptiResult CUPTIAPI cuptiProfilerHostInitialize(CUpti_Profiler_Host_Initialize_Params* pParams);
129
+
130
+ /**
131
+ * \brief Params for cuptiProfilerHostDeinitialize
132
+ */
133
+ typedef struct CUpti_Profiler_Host_Deinitialize_Params
134
+ {
135
+ /// [in] Size of the data structure.
136
+ /// CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
137
+ /// available in the structure. Used to preserve backward compatibility.
138
+ size_t structSize;
139
+ /// [in] Assign to NULL
140
+ void* pPriv;
141
+ /// [in] reference to the profiler host object allocated by CUPTI in cuptiProfilerHostInitialize
142
+ struct CUpti_Profiler_Host_Object* pHostObject;
143
+ } CUpti_Profiler_Host_Deinitialize_Params;
144
+
145
+ #define CUpti_Profiler_Host_Deinitialize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_Deinitialize_Params, pHostObject)
146
+
147
+ /**
148
+ * \brief Deinitialize and destroy the profiler host object (CUpti_Profiler_Host_Object).
149
+ *
150
+ * \param pParams A pointer to \ref CUpti_Profiler_Host_Deinitialize_Params
151
+ *
152
+ * \retval CUPTI_SUCCESS
153
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
154
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
155
+ */
156
+ CUptiResult CUPTIAPI cuptiProfilerHostDeinitialize(CUpti_Profiler_Host_Deinitialize_Params* pParams);
157
+
158
+ /**
159
+ * \brief Params for cuptiProfilerHostGetSupportedChips
160
+ */
161
+ typedef struct CUpti_Profiler_Host_GetSupportedChips_Params
162
+ {
163
+ /// [in] Size of the data structure.
164
+ size_t structSize;
165
+ /// [in] Assign to NULL
166
+ void* pPriv;
167
+ /// [out] number of supported chips
168
+ size_t numChips;
169
+ /// [out] list of supported chips
170
+ const char* const* ppChipNames;
171
+ } CUpti_Profiler_Host_GetSupportedChips_Params;
172
+
173
+ #define CUpti_Profiler_Host_GetSupportedChips_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetSupportedChips_Params, ppChipNames)
174
+
175
+ /**
176
+ * \brief Get the list of supported chips.
177
+ *
178
+ * \param pParams A pointer to \ref CUpti_Profiler_Host_GetSupportedChips_Params
179
+ *
180
+ * \retval CUPTI_SUCCESS
181
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
182
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
183
+ */
184
+ CUptiResult CUPTIAPI cuptiProfilerHostGetSupportedChips(CUpti_Profiler_Host_GetSupportedChips_Params* pParams);
185
+
186
+ /**
187
+ * \brief Params for cuptiProfilerHostGetSupportedMetrics
188
+ */
189
+ typedef struct CUpti_Profiler_Host_GetBaseMetrics_Params
190
+ {
191
+ /// [in] Size of the data structure.
192
+ size_t structSize;
193
+ /// [in] Assign to NULL
194
+ void* pPriv;
195
+ /// [in] reference to the profiler host object allocated by CUPTI in cuptiProfilerHostInitialize
196
+ struct CUpti_Profiler_Host_Object* pHostObject;
197
+ /// [in] metric type (counter, ratio, throughput)
198
+ CUpti_MetricType metricType;
199
+ /// [out] list of base metrics supported of queried metric type for the chip
200
+ const char** ppMetricNames;
201
+ /// [out] number of metrics
202
+ size_t numMetrics;
203
+ } CUpti_Profiler_Host_GetBaseMetrics_Params;
204
+
205
+ #define CUpti_Profiler_Host_GetBaseMetrics_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetBaseMetrics_Params, numMetrics)
206
+
207
+ /**
208
+ * \brief Get the list of supported base metrics for the chip.
209
+ *
210
+ * \param pParams A pointer to \ref CUpti_Profiler_Host_GetBaseMetrics_Params
211
+ *
212
+ * \retval CUPTI_SUCCESS
213
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
214
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
215
+ */
216
+ CUptiResult CUPTIAPI cuptiProfilerHostGetBaseMetrics(CUpti_Profiler_Host_GetBaseMetrics_Params* pParams);
217
+
218
+ /**
219
+ * \brief Params for cuptiProfilerHostGetSubMetrics
220
+ */
221
+ typedef struct CUpti_Profiler_Host_GetSubMetrics_Params
222
+ {
223
+ /// [in] Size of the data structure.
224
+ size_t structSize;
225
+ /// [in] Assign to NULL
226
+ void* pPriv;
227
+ /// [in] reference to the profiler host object allocated by CUPTI in cuptiProfilerHostInitialize
228
+ CUpti_Profiler_Host_Object* pHostObject;
229
+ /// [in] the metric type for queried metric
230
+ CUpti_MetricType metricType;
231
+ /// [in] metric name for which sub-metric will be listed
232
+ const char* pMetricName;
233
+ /// [out] number of submetrics supported
234
+ size_t numOfSubmetrics;
235
+ /// [out] list of submetrics supported for the metric.
236
+ const char** ppSubMetrics;
237
+ } CUpti_Profiler_Host_GetSubMetrics_Params;
238
+
239
+ #define CUpti_Profiler_Host_GetSubMetrics_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetSubMetrics_Params, ppSubMetrics)
240
+
241
+ /**
242
+ * \brief Get the list of supported sub-metrics for the metric.
243
+ *
244
+ * \param pParams A pointer to \ref CUpti_Profiler_Host_GetSubMetrics_Params
245
+ *
246
+ * \retval CUPTI_SUCCESS
247
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
248
+ * \retval CUPTI_ERROR_INVALID_METRIC_NAME if the metric name is not valid or not supported for the chip
249
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
250
+ */
251
+ CUptiResult CUPTIAPI cuptiProfilerHostGetSubMetrics(CUpti_Profiler_Host_GetSubMetrics_Params* pParams);
252
+
253
+ /**
254
+ * \brief Params for cuptiProfilerHostGetMetricProperties
255
+ */
256
+ typedef struct CUpti_Profiler_Host_GetMetricProperties_Params
257
+ {
258
+ /// [in] Size of the data structure.
259
+ size_t structSize;
260
+ /// [in] Assign to NULL
261
+ void* pPriv;
262
+ /// [in] reference to the profiler host object allocated by CUPTI in cuptiProfilerHostInitialize
263
+ CUpti_Profiler_Host_Object* pHostObject;
264
+ /// [in] metric name for which its properties will be listed
265
+ const char* pMetricName;
266
+ /// [out] a short description about the metric
267
+ const char* pDescription;
268
+ /// [out] associated hw unit for the metric
269
+ const char* pHwUnit;
270
+ /// [out] the dimension of the metric values
271
+ const char* pDimUnit;
272
+ /// [out] the metric type (counter, ratio or throughput)
273
+ CUpti_MetricType metricType;
274
+ } CUpti_Profiler_Host_GetMetricProperties_Params;
275
+
276
+ #define CUpti_Profiler_Host_GetMetricProperties_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetMetricProperties_Params, metricType)
277
+
278
+ /**
279
+ * \brief Get the properties of the metric.
280
+ *
281
+ * \param pParams A pointer to \ref CUpti_Profiler_Host_GetMetricProperties_Params
282
+ *
283
+ * \retval CUPTI_SUCCESS
284
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
285
+ * \retval CUPTI_ERROR_INVALID_METRIC_NAME if the metric name is not valid or not supported for the chip
286
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
287
+ */
288
+ CUptiResult CUPTIAPI cuptiProfilerHostGetMetricProperties(CUpti_Profiler_Host_GetMetricProperties_Params* pParams);
289
+
290
+ /**
291
+ * \brief Params for cuptiProfilerHostGetRangeName
292
+ */
293
+ typedef struct CUpti_Profiler_Host_GetRangeName_Params
294
+ {
295
+ /// [in] Size of the data structure.
296
+ size_t structSize;
297
+ /// [in] Assign to NULL
298
+ void* pPriv;
299
+ /// [in] the counter data image where profiling data has been decoded
300
+ const uint8_t* pCounterDataImage;
301
+ /// [in] size of counter data image
302
+ size_t counterDataImageSize;
303
+ /// [in] range index for which the range name will be queried
304
+ size_t rangeIndex;
305
+ /// [in] used in case of nested ranges, default="/". Range1<delimiter>Range2
306
+ const char* delimiter;
307
+ /// [out] the range name.
308
+ /// Note: that the CUPTI allocate the memory internal and
309
+ /// its user responsibility to free up the allocated memory
310
+ const char* pRangeName;
311
+ } CUpti_Profiler_Host_GetRangeName_Params;
312
+
313
+ #define CUpti_Profiler_Host_GetRangeName_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetRangeName_Params, pRangeName)
314
+
315
+ /**
316
+ * \brief Get the range name for the range index stored in the counter data.
317
+ * In Range profiler, for Auto range mode the range name will be numeric value
318
+ * assigned to the kernel based on execution order. For user range mode, the
319
+ * name of range will be based on the range name provided by the user using
320
+ * Push range API.
321
+ *
322
+ * \param pParams A pointer to \ref CUpti_Profiler_Host_GetRangeName_Params
323
+ *
324
+ * \retval CUPTI_SUCCESS
325
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
326
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
327
+ */
328
+ CUptiResult CUPTIAPI cuptiProfilerHostGetRangeName(CUpti_Profiler_Host_GetRangeName_Params* pParams);
329
+
330
+ /**
331
+ * \brief Params for cuptiProfilerHostEvaluateToGpuValues
332
+ */
333
+ typedef struct CUpti_Profiler_Host_EvaluateToGpuValues_Params
334
+ {
335
+ /// [in] Size of the data structure.
336
+ size_t structSize;
337
+ /// [in] Assign to NULL
338
+ void* pPriv;
339
+ /// [in] reference to the profiler host object allocated by CUPTI in cuptiProfilerHostInitialize
340
+ CUpti_Profiler_Host_Object* pHostObject;
341
+ /// [in] the counter data image where profiling data has been decoded
342
+ const uint8_t* pCounterDataImage;
343
+ /// [in] size of counter data image
344
+ size_t counterDataImageSize;
345
+ /// [in] range index for which the range name will be queried
346
+ size_t rangeIndex;
347
+ /// [in] the metrics for which GPU values will be evaluated for the range
348
+ const char** ppMetricNames;
349
+ /// [in] number of metrics
350
+ size_t numMetrics;
351
+ /// [out] output value for given metric and range index
352
+ double* pMetricValues;
353
+ } CUpti_Profiler_Host_EvaluateToGpuValues_Params;
354
+
355
+ #define CUpti_Profiler_Host_EvaluateToGpuValues_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_EvaluateToGpuValues_Params, pMetricValues)
356
+
357
+ /**
358
+ * \brief Evaluate the metric values for the range index stored in the counter data.
359
+ *
360
+ * \param pParams A pointer to \ref CUpti_Profiler_Host_EvaluateToGpuValues_Params
361
+ *
362
+ * \retval CUPTI_SUCCESS
363
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
364
+ * \retval CUPTI_ERROR_INVALID_METRIC_NAME if the metric name is not valid or not supported for the chip
365
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
366
+ */
367
+ CUptiResult CUPTIAPI cuptiProfilerHostEvaluateToGpuValues(CUpti_Profiler_Host_EvaluateToGpuValues_Params* pParams);
368
+
369
+ /**
370
+ * \brief Params for cuptiProfilerHostConfigAddMetrics
371
+ */
372
+ typedef struct CUpti_Profiler_Host_ConfigAddMetrics_Params
373
+ {
374
+ /// [in] Size of the data structure.
375
+ size_t structSize;
376
+ /// [in] Assign to NULL
377
+ void* pPriv;
378
+ /// [in] reference to the profiler host object allocated by CUPTI in cuptiProfilerHostInitialize
379
+ struct CUpti_Profiler_Host_Object* pHostObject;
380
+ /// [in] metric names for which config image will be generated
381
+ const char** ppMetricNames;
382
+ /// [in] number of metrics
383
+ size_t numMetrics;
384
+ } CUpti_Profiler_Host_ConfigAddMetrics_Params;
385
+
386
+ #define CUpti_Profiler_Host_ConfigAddMetrics_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_ConfigAddMetrics_Params, numMetrics)
387
+
388
+ /**
389
+ * \brief Add the metrics to the profiler host object for generating the config image.
390
+ * The config image will have the required information to schedule the metrics for
391
+ * collecting the profiling data.
392
+ * Note: PM sampling only supports single pass config image.
393
+ *
394
+ * \param pParams A pointer to \ref CUpti_Profiler_Host_ConfigAddMetrics_Params
395
+ *
396
+ * \retval CUPTI_SUCCESS
397
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
398
+ * \retval CUPTI_ERROR_INVALID_METRIC_NAME if the metric name is not valid or not supported for the chip
399
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
400
+ */
401
+ CUptiResult CUPTIAPI cuptiProfilerHostConfigAddMetrics(CUpti_Profiler_Host_ConfigAddMetrics_Params* pParams);
402
+
403
+ /**
404
+ * \brief Params for cuptiProfilerHostGetConfigImageSize
405
+ */
406
+ typedef struct CUpti_Profiler_Host_GetConfigImageSize_Params
407
+ {
408
+ /// [in] Size of the data structure.
409
+ size_t structSize;
410
+ /// [in] Assign to NULL
411
+ void* pPriv;
412
+ /// [in] reference to the profiler host object allocated by CUPTI in cuptiProfilerHostInitialize
413
+ CUpti_Profiler_Host_Object* pHostObject;
414
+ /// [out] the size of config image, users need to allocate the buffer for storing
415
+ size_t configImageSize;
416
+ } CUpti_Profiler_Host_GetConfigImageSize_Params;
417
+
418
+ #define CUpti_Profiler_Host_GetConfigImageSize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetConfigImageSize_Params, configImageSize)
419
+
420
+ /**
421
+ * \brief Get the size of the config image for the metrics added to the profiler host object.
422
+ * Users need to allocate the buffer for storing the config image.
423
+ *
424
+ * \param pParams A pointer to \ref CUpti_Profiler_Host_GetConfigImageSize_Params
425
+ *
426
+ * \retval CUPTI_SUCCESS
427
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
428
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
429
+ */
430
+ CUptiResult CUPTIAPI cuptiProfilerHostGetConfigImageSize(CUpti_Profiler_Host_GetConfigImageSize_Params* pParams);
431
+
432
+ /**
433
+ * \brief Params for cuptiProfilerHostGetConfigImage
434
+ */
435
+ typedef struct CUpti_Profiler_Host_GetConfigImage_Params
436
+ {
437
+ /// [in] Size of the data structure.
438
+ size_t structSize;
439
+ /// [in] Assign to NULL
440
+ void* pPriv;
441
+ /// [in] reference to the profiler host object allocated by CUPTI in cuptiProfilerHostInitialize
442
+ CUpti_Profiler_Host_Object* pHostObject;
443
+ /// [in] Number of bytes allocated for pBuffer
444
+ size_t configImageSize;
445
+ /// [out] Buffer receiving the config image
446
+ uint8_t* pConfigImage;
447
+ } CUpti_Profiler_Host_GetConfigImage_Params;
448
+
449
+ #define CUpti_Profiler_Host_GetConfigImage_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetConfigImage_Params, pConfigImage)
450
+
451
+ /**
452
+ * \brief Get the config image for the metrics added to the profiler host object.
453
+ * User will pass the allocated buffer to store the config image.
454
+ *
455
+ * \param pParams A pointer to \ref CUpti_Profiler_Host_GetConfigImage_Params
456
+ *
457
+ * \retval CUPTI_SUCCESS
458
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
459
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
460
+ */
461
+ CUptiResult CUPTIAPI cuptiProfilerHostGetConfigImage(CUpti_Profiler_Host_GetConfigImage_Params* pParams);
462
+
463
+ /**
464
+ * \brief Params for cuptiProfilerHostGetNumOfPasses
465
+ */
466
+ typedef struct CUpti_Profiler_Host_GetNumOfPasses_Params
467
+ {
468
+ /// [in] Size of the data structure.
469
+ size_t structSize;
470
+ /// [in] Assign to NULL
471
+ void* pPriv;
472
+ /// [in] Number of bytes allocated for pConfigImage
473
+ size_t configImageSize;
474
+ /// [in] the config image buffer
475
+ uint8_t* pConfigImage;
476
+ /// [out] number of passes required for profiling scheduled metrics in the config image
477
+ size_t numOfPasses;
478
+ } CUpti_Profiler_Host_GetNumOfPasses_Params;
479
+
480
+ #define CUpti_Profiler_Host_GetNumOfPasses_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetNumOfPasses_Params, numOfPasses)
481
+
482
+ /**
483
+ * \brief Get the number of passes required for profiling the scheduled metrics in the config image.
484
+ *
485
+ * \param pParams A pointer to \ref CUpti_Profiler_Host_GetNumOfPasses_Params
486
+ *
487
+ * \retval CUPTI_SUCCESS
488
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
489
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
490
+ */
491
+ CUptiResult CUPTIAPI cuptiProfilerHostGetNumOfPasses(CUpti_Profiler_Host_GetNumOfPasses_Params* pParams);
492
+
493
+ /**
494
+ * \brief Params for cuptiProfilerHostGetMaxNumHardwareMetricsPerPass
495
+ */
496
+ typedef struct CUpti_Profiler_Host_GetMaxNumHardwareMetricsPerPass_Params
497
+ {
498
+ /// [in] Size of the data structure.
499
+ size_t structSize;
500
+ /// [in] Assign to NULL
501
+ void* pPriv;
502
+ /// [in] the profiler kind one from CUpti_ProfilerType
503
+ CUpti_ProfilerType profilerType;
504
+ /// [in] accepted for chips supported at the time-of-release.
505
+ const char* pChipName;
506
+ /// [in] buffer with counter availability image - required for future chip support
507
+ uint8_t* pCounterAvailabilityImage;
508
+ /// [out] maximum number of metrics that can be scheduled in a pass
509
+ size_t maxMetricsPerPass;
510
+ } CUpti_Profiler_Host_GetMaxNumHardwareMetricsPerPass_Params;
511
+
512
+ #define CUpti_Profiler_Host_GetMaxNumHardwareMetricsPerPass_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetMaxNumHardwareMetricsPerPass_Params, maxMetricsPerPass)
513
+
514
+ /**
515
+ * \brief Get the maximum number of hardware metrics (metric names which doesn't include _sass_ keyword)
516
+ * that can be scheduled in a single pass for a chip. While this represents a theoretical upper limit,
517
+ * practical constraints may prevent reaching this threshold for a specific set of metrics. Furthermore,
518
+ * the maximum achievable value is contingent upon the characteristics and architecture of the chip in question.
519
+ *
520
+ * Use cuptiProfilerHostGetNumOfPasses API for getting the actual number of passes required for the
521
+ * for collecting the profiling data for the scheduled metrics in a config image.
522
+ *
523
+ * \param pParams A pointer to \ref CUpti_Profiler_Host_GetMaxNumHardwareMetricsPerPass_Params
524
+ *
525
+ * \retval CUPTI_SUCCESS
526
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
527
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
528
+ */
529
+ CUptiResult CUPTIAPI cuptiProfilerHostGetMaxNumHardwareMetricsPerPass(CUpti_Profiler_Host_GetMaxNumHardwareMetricsPerPass_Params* pParams);
530
+
531
+ /** @} */ /* END CUPTI_METRIC_API */
532
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
533
+ #pragma GCC visibility pop
534
+ #endif
535
+
536
+
537
+ #ifdef __cplusplus
538
+ } /* extern "C" */
539
+ #endif
540
+
541
+ #endif
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_profiler_target.h ADDED
@@ -0,0 +1,602 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2011-2023 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_PROFILER_TARGET_H_)
51
+ #define _CUPTI_PROFILER_TARGET_H_
52
+
53
+ #include <cuda.h>
54
+ #include <cupti_result.h>
55
+ #include <stddef.h>
56
+ #include <stdint.h>
57
+
58
+ #ifdef __cplusplus
59
+ extern "C" {
60
+ #endif
61
+
62
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
63
+ #pragma GCC visibility push(default)
64
+ #endif
65
+
66
+ /**
67
+ * \defgroup CUPTI_PROFILER_API CUPTI Profiling API
68
+ * Functions, types, and enums that implement the CUPTI Profiling API.
69
+ * @{
70
+ */
71
+ #ifndef CUPTI_PROFILER_STRUCT_SIZE
72
+ #define CUPTI_PROFILER_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
73
+ #endif
74
+
75
+ /**
76
+ * \brief Profiler range attribute
77
+ *
78
+ * A metric enabled in the session's configuration is collected separately per unique range-stack in the pass.
79
+ * This is an attribute to collect metrics around each kernel in a profiling session or in an user defined range.
80
+ */
81
+ typedef enum
82
+ {
83
+ /**
84
+ * Invalid value
85
+ */
86
+ CUPTI_Range_INVALID,
87
+ /**
88
+ * Ranges are auto defined around each kernel in a profiling session
89
+ */
90
+ CUPTI_AutoRange,
91
+ /**
92
+ * A range in which metric data to be collected is defined by the user
93
+ */
94
+ CUPTI_UserRange,
95
+ /**
96
+ * Range count
97
+ */
98
+ CUPTI_Range_COUNT,
99
+ } CUpti_ProfilerRange;
100
+
101
+ /**
102
+ * \brief Profiler replay attribute
103
+ *
104
+ * For metrics which require multipass collection, a replay of the GPU kernel(s) is required.
105
+ * This is an attribute which specify how the replay of the kernel(s) to be measured is done.
106
+ */
107
+ typedef enum
108
+ {
109
+ /**
110
+ * Invalid Value
111
+ */
112
+ CUPTI_Replay_INVALID,
113
+ /**
114
+ * Replay is done by CUPTI user around the process
115
+ */
116
+ CUPTI_ApplicationReplay,
117
+ /**
118
+ * Replay is done around kernel implicitly by CUPTI
119
+ */
120
+ CUPTI_KernelReplay,
121
+ /**
122
+ * Replay is done by CUPTI user within a process
123
+ */
124
+ CUPTI_UserReplay,
125
+ /**
126
+ * Replay count
127
+ */
128
+ CUPTI_Replay_COUNT,
129
+ } CUpti_ProfilerReplayMode;
130
+
131
+ /**
132
+ * \brief Default parameter for cuptiProfilerInitialize
133
+ */
134
+ typedef struct CUpti_Profiler_Initialize_Params
135
+ {
136
+ size_t structSize; //!< [in] CUpti_Profiler_Initialize_Params_STRUCT_SIZE
137
+ void* pPriv; //!< [in] assign to NULL
138
+
139
+ } CUpti_Profiler_Initialize_Params;
140
+ #define CUpti_Profiler_Initialize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Initialize_Params, pPriv)
141
+
142
+ /**
143
+ * \brief Default parameter for cuptiProfilerDeInitialize
144
+ */
145
+ typedef struct CUpti_Profiler_DeInitialize_Params
146
+ {
147
+ size_t structSize; //!< [in] CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE
148
+ void* pPriv; //!< [in] assign to NULL
149
+
150
+ } CUpti_Profiler_DeInitialize_Params;
151
+ #define CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DeInitialize_Params, pPriv)
152
+
153
+ /**
154
+ * \brief Initializes the profiler interface
155
+ *
156
+ * Loads the required libraries in the process address space.
157
+ * Sets up the hooks with the CUDA driver.
158
+ */
159
+ CUptiResult CUPTIAPI cuptiProfilerInitialize(CUpti_Profiler_Initialize_Params *pParams);
160
+
161
+ /**
162
+ * \brief DeInitializes the profiler interface
163
+ */
164
+ CUptiResult CUPTIAPI cuptiProfilerDeInitialize(CUpti_Profiler_DeInitialize_Params *pParams);
165
+
166
+ /**
167
+ * \brief Input parameter to define the counterDataImage
168
+ */
169
+ typedef struct CUpti_Profiler_CounterDataImageOptions
170
+ {
171
+ size_t structSize; //!< [in] CUpti_Profiler_CounterDataImageOptions_Params_STRUCT_SIZE
172
+ void* pPriv; //!< [in] assign to NULL
173
+
174
+ const uint8_t* pCounterDataPrefix; /**< [in] Address of CounterDataPrefix generated from NVPW_CounterDataBuilder_GetCounterDataPrefix().
175
+ Must be align(8).*/
176
+ size_t counterDataPrefixSize; //!< [in] Size of CounterDataPrefix generated from NVPW_CounterDataBuilder_GetCounterDataPrefix().
177
+ uint32_t maxNumRanges; //!< [in] Maximum number of ranges that can be profiled
178
+ uint32_t maxNumRangeTreeNodes; //!< [in] Maximum number of RangeTree nodes; must be >= maxNumRanges
179
+ uint32_t maxRangeNameLength; //!< [in] Maximum string length of each RangeName, including the trailing NULL character
180
+ } CUpti_Profiler_CounterDataImageOptions;
181
+ #define CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImageOptions, maxRangeNameLength)
182
+
183
+ /**
184
+ * \brief Params for cuptiProfilerCounterDataImageCalculateSize
185
+ */
186
+ typedef struct CUpti_Profiler_CounterDataImage_CalculateSize_Params
187
+ {
188
+ size_t structSize; //!< [in] CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE
189
+ void* pPriv; //!< [in] assign to NULL
190
+
191
+ size_t sizeofCounterDataImageOptions; //!< [in] CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE
192
+ const CUpti_Profiler_CounterDataImageOptions* pOptions; //!< [in] Pointer to Counter Data Image Options
193
+ size_t counterDataImageSize; //!< [out]
194
+ } CUpti_Profiler_CounterDataImage_CalculateSize_Params;
195
+ #define CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_CalculateSize_Params, counterDataImageSize)
196
+
197
+ /**
198
+ * \brief Params for cuptiProfilerCounterDataImageInitialize
199
+ */
200
+ typedef struct CUpti_Profiler_CounterDataImage_Initialize_Params
201
+ {
202
+ size_t structSize; //!< [in] CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE
203
+ void* pPriv; //!< [in] assign to NULL
204
+
205
+ size_t sizeofCounterDataImageOptions; //!< [in] CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE
206
+ const CUpti_Profiler_CounterDataImageOptions* pOptions; //!< [in] Pointer to Counter Data Image Options
207
+ size_t counterDataImageSize; //!< [in] Size calculated from cuptiProfilerCounterDataImageCalculateSize
208
+ uint8_t* pCounterDataImage; //!< [in] The buffer to be initialized.
209
+ } CUpti_Profiler_CounterDataImage_Initialize_Params;
210
+ #define CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_Initialize_Params, pCounterDataImage)
211
+
212
+ /**
213
+ * \brief A CounterData image allocates space for values for each counter for each range.
214
+ *
215
+ * User borne the resposibility of managing the counterDataImage allocations.
216
+ * CounterDataPrefix contains meta data about the metrics that will be stored in counterDataImage.
217
+ * Use these APIs to calculate the allocation size and initialize counterData image.
218
+ */
219
+ CUptiResult CUPTIAPI cuptiProfilerCounterDataImageCalculateSize(CUpti_Profiler_CounterDataImage_CalculateSize_Params* pParams);
220
+ CUptiResult CUPTIAPI cuptiProfilerCounterDataImageInitialize(CUpti_Profiler_CounterDataImage_Initialize_Params* pParams);
221
+
222
+ /**
223
+ * \brief Params for cuptiProfilerCounterDataImageCalculateScratchBufferSize
224
+ */
225
+ typedef struct CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params
226
+ {
227
+ size_t structSize; //!< [in] CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE
228
+ void* pPriv; //!< [in] assign to NULL
229
+
230
+ size_t counterDataImageSize; //!< [in] size calculated from cuptiProfilerCounterDataImageCalculateSize
231
+ uint8_t* pCounterDataImage; //!< [in]
232
+ size_t counterDataScratchBufferSize; //!< [out]
233
+ } CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params;
234
+ #define CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params, counterDataScratchBufferSize)
235
+
236
+ /**
237
+ * \brief Params for cuptiProfilerCounterDataImageInitializeScratchBuffer
238
+ */
239
+ typedef struct CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params
240
+ {
241
+ size_t structSize; //!< [in] CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE
242
+ void* pPriv; //!< [in] assign to NULL
243
+
244
+ size_t counterDataImageSize; //!< [in] size calculated from cuptiProfilerCounterDataImageCalculateSize
245
+ uint8_t* pCounterDataImage; //!< [in]
246
+ size_t counterDataScratchBufferSize; //!< [in] size calculated using cuptiProfilerCounterDataImageCalculateScratchBufferSize
247
+ uint8_t* pCounterDataScratchBuffer; //!< [in] the scratch buffer to be initialized.
248
+ } CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params;
249
+ #define CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params, pCounterDataScratchBuffer)
250
+
251
+ /**
252
+ * \brief A temporary storage for CounterData image needed for internal operations
253
+ *
254
+ * Use these APIs to calculate the allocation size and initialize counterData image scratch buffer.
255
+ */
256
+ CUptiResult CUPTIAPI cuptiProfilerCounterDataImageCalculateScratchBufferSize(CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params* pParams);
257
+ CUptiResult CUPTIAPI cuptiProfilerCounterDataImageInitializeScratchBuffer(CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params* pParams);
258
+
259
+ /**
260
+ * \brief Params for cuptiProfilerBeginSession
261
+ */
262
+ typedef struct CUpti_Profiler_BeginSession_Params
263
+ {
264
+ size_t structSize; //!< [in] CUpti_Profiler_BeginSession_Params_STRUCT_SIZE
265
+ void* pPriv; //!< [in] assign to NULL
266
+
267
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
268
+ size_t counterDataImageSize; //!< [in] size calculated from cuptiProfilerCounterDataImageCalculateSize
269
+ uint8_t* pCounterDataImage; //!< [in] address of CounterDataImage
270
+ size_t counterDataScratchBufferSize; //!< [in] size calculated from cuptiProfilerCounterDataImageInitializeScratchBuffer
271
+ uint8_t* pCounterDataScratchBuffer; //!< [in] address of CounterDataImage scratch buffer
272
+ uint8_t bDumpCounterDataInFile; //!< [in] [optional]
273
+ const char* pCounterDataFilePath; //!< [in] [optional]
274
+ CUpti_ProfilerRange range; //!< [in] CUpti_ProfilerRange
275
+ CUpti_ProfilerReplayMode replayMode; //!< [in] CUpti_ProfilerReplayMode
276
+ /* Replay options, required when replay is done by cupti user */
277
+ size_t maxRangesPerPass; //!< [in] Maximum number of ranges that can be recorded in a single pass.
278
+ size_t maxLaunchesPerPass; //!< [in] Maximum number of kernel launches that can be recorded in a single pass; must be >= maxRangesPerPass.
279
+
280
+ } CUpti_Profiler_BeginSession_Params;
281
+ #define CUpti_Profiler_BeginSession_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_BeginSession_Params, maxLaunchesPerPass)
282
+ /**
283
+ * \brief Params for cuptiProfilerEndSession
284
+ */
285
+ typedef struct CUpti_Profiler_EndSession_Params
286
+ {
287
+ size_t structSize; //!< [in] CUpti_Profiler_EndSession_Params_STRUCT_SIZE
288
+ void* pPriv; //!< [in] assign to NULL
289
+
290
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
291
+ } CUpti_Profiler_EndSession_Params;
292
+ #define CUpti_Profiler_EndSession_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_EndSession_Params, ctx)
293
+
294
+ /**
295
+ * \brief Begin profiling session sets up the profiling on the device
296
+ *
297
+ * Although, it doesn't start the profiling but GPU resources needed for profiling are allocated.
298
+ * Outside of a session, the GPU will return to its normal operating state.
299
+ */
300
+ CUptiResult CUPTIAPI cuptiProfilerBeginSession(CUpti_Profiler_BeginSession_Params* pParams);
301
+ /**
302
+ * \brief Ends profiling session
303
+ *
304
+ * Frees up the GPU resources acquired for profiling.
305
+ * Outside of a session, the GPU will return to it's normal operating state.
306
+ */
307
+ CUptiResult CUPTIAPI cuptiProfilerEndSession(CUpti_Profiler_EndSession_Params* pParams);
308
+
309
+ /**
310
+ * \brief Params for cuptiProfilerSetConfig
311
+ */
312
+ typedef struct CUpti_Profiler_SetConfig_Params
313
+ {
314
+ size_t structSize; //!< [in] CUpti_Profiler_SetConfig_Params_STRUCT_SIZE
315
+ void* pPriv; //!< [in] assign to NULL
316
+
317
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
318
+ const uint8_t* pConfig; //!< [in] Config created by NVPW_RawMetricsConfig_GetConfigImage(). Must be align(8).
319
+ size_t configSize; //!< [in] size of config
320
+ uint16_t minNestingLevel; //!< [in] the lowest nesting level to be profiled; must be >= 1
321
+ uint16_t numNestingLevels; //!< [in] the number of nesting levels to profile; must be >= 1
322
+ size_t passIndex; //!< [in] Set this to zero for in-app replay; set this to the output of EndPass() for application replay
323
+ uint16_t targetNestingLevel; //!< [in] Set this to minNestingLevel for in-app replay; set this to the output of EndPass() for application
324
+ } CUpti_Profiler_SetConfig_Params;
325
+
326
+ #define CUpti_Profiler_SetConfig_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_SetConfig_Params, targetNestingLevel)
327
+
328
+ /**
329
+ * \brief Params for cuptiProfilerUnsetConfig
330
+ */
331
+ typedef struct CUpti_Profiler_UnsetConfig_Params
332
+ {
333
+ size_t structSize; //!< [in] CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE
334
+ void* pPriv; //!< [in] assign to NULL
335
+
336
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
337
+ } CUpti_Profiler_UnsetConfig_Params;
338
+ #define CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_UnsetConfig_Params, ctx)
339
+
340
+ /**
341
+ * \brief Set metrics configuration to be profiled
342
+ *
343
+ * Use these APIs to set the config to profile in a session. It can be used for advanced cases such as where multiple
344
+ * configurations are collected into a single CounterData Image on the need basis, without restarting the session.
345
+ */
346
+ CUptiResult CUPTIAPI cuptiProfilerSetConfig(CUpti_Profiler_SetConfig_Params* pParams);
347
+ /**
348
+ * \brief Unset metrics configuration profiled
349
+ *
350
+ */
351
+ CUptiResult CUPTIAPI cuptiProfilerUnsetConfig(CUpti_Profiler_UnsetConfig_Params* pParams);
352
+
353
+ /**
354
+ * \brief Params for cuptiProfilerBeginPass
355
+ */
356
+ typedef struct CUpti_Profiler_BeginPass_Params
357
+ {
358
+ size_t structSize; //!< [in] CUpti_Profiler_BeginPass_Params_STRUCT_SIZE
359
+ void* pPriv; //!< [in] assign to NULL
360
+
361
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
362
+ } CUpti_Profiler_BeginPass_Params;
363
+ #define CUpti_Profiler_BeginPass_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_BeginPass_Params, ctx)
364
+
365
+ /**
366
+ * \brief Params for cuptiProfilerEndPass
367
+ */
368
+ typedef struct CUpti_Profiler_EndPass_Params
369
+ {
370
+ size_t structSize; //!< [in] CUpti_Profiler_EndPass_Params_STRUCT_SIZE
371
+ void* pPriv; //!< [in] assign to NULL
372
+
373
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
374
+ uint16_t targetNestingLevel; //! [out] The targetNestingLevel that will be collected by the *next* BeginPass.
375
+ size_t passIndex; //!< [out] The passIndex that will be collected by the *next* BeginPass
376
+ uint8_t allPassesSubmitted; //!< [out] becomes true when the last pass has been queued to the GPU
377
+ } CUpti_Profiler_EndPass_Params;
378
+ #define CUpti_Profiler_EndPass_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_EndPass_Params, allPassesSubmitted)
379
+
380
+ /**
381
+ * \brief Replay API: used for multipass collection.
382
+
383
+ * These APIs are used if user chooses to replay by itself \ref CUPTI_UserReplay or \ref CUPTI_ApplicationReplay
384
+ * for multipass collection of the metrics configurations.
385
+ * It's a no-op in case of \ref CUPTI_KernelReplay.
386
+ */
387
+ CUptiResult CUPTIAPI cuptiProfilerBeginPass(CUpti_Profiler_BeginPass_Params* pParams);
388
+
389
+ /**
390
+ * \brief Replay API: used for multipass collection.
391
+
392
+ * These APIs are used if user chooses to replay by itself \ref CUPTI_UserReplay or \ref CUPTI_ApplicationReplay
393
+ * for multipass collection of the metrics configurations.
394
+ * Its a no-op in case of \ref CUPTI_KernelReplay.
395
+ * Returns information for next pass.
396
+ */
397
+ CUptiResult CUPTIAPI cuptiProfilerEndPass(CUpti_Profiler_EndPass_Params* pParams);
398
+
399
+ /**
400
+ * \brief Params for cuptiProfilerEnableProfiling
401
+ */
402
+ typedef struct CUpti_Profiler_EnableProfiling_Params
403
+ {
404
+ size_t structSize; //!< [in] CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE
405
+ void* pPriv; //!< [in] assign to NULL
406
+
407
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
408
+ } CUpti_Profiler_EnableProfiling_Params;
409
+ #define CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_EnableProfiling_Params, ctx)
410
+
411
+ /**
412
+ * \brief Params for cuptiProfilerDisableProfiling
413
+ */
414
+ typedef struct CUpti_Profiler_DisableProfiling_Params
415
+ {
416
+ size_t structSize; //!< [in] CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE
417
+ void* pPriv; //!< [in] assign to NULL
418
+
419
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
420
+ } CUpti_Profiler_DisableProfiling_Params;
421
+ #define CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DisableProfiling_Params, ctx)
422
+
423
+ /**
424
+ * \brief Enables Profiling
425
+ *
426
+ * In \ref CUPTI_AutoRange, these APIs are used to enable/disable profiling for the kernels to be executed in
427
+ * a profiling session.
428
+ */
429
+ CUptiResult CUPTIAPI cuptiProfilerEnableProfiling(CUpti_Profiler_EnableProfiling_Params* pParams);
430
+
431
+ /**
432
+ * \brief Disable Profiling
433
+ *
434
+ * In \ref CUPTI_AutoRange, these APIs are used to enable/disable profiling for the kernels to be executed in
435
+ * a profiling session.
436
+ */
437
+ CUptiResult CUPTIAPI cuptiProfilerDisableProfiling(CUpti_Profiler_DisableProfiling_Params* pParams);
438
+
439
+ /**
440
+ * \brief Params for cuptiProfilerIsPassCollected
441
+ */
442
+ typedef struct CUpti_Profiler_IsPassCollected_Params
443
+ {
444
+ size_t structSize; //!< [in] CUpti_Profiler_IsPassCollected_Params_STRUCT_SIZE
445
+ void* pPriv; //!< [in] assign to NULL
446
+
447
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
448
+ size_t numRangesDropped; //!< [out] number of ranges whose data was dropped in the processed pass
449
+ size_t numTraceBytesDropped; //!< [out] number of bytes not written to TraceBuffer due to buffer full
450
+ uint8_t onePassCollected; //!< [out] true if a pass was successfully decoded
451
+ uint8_t allPassesCollected; //!< [out] becomes true when the last pass has been decoded
452
+ } CUpti_Profiler_IsPassCollected_Params;
453
+ #define CUpti_Profiler_IsPassCollected_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_IsPassCollected_Params, allPassesCollected)
454
+
455
+ /**
456
+ * \brief Asynchronous call to query if the submitted pass to GPU is collected
457
+ *
458
+ */
459
+ CUptiResult CUPTIAPI cuptiProfilerIsPassCollected(CUpti_Profiler_IsPassCollected_Params* pParams);
460
+
461
+ /**
462
+ * \brief Params for cuptiProfilerFlushCounterData
463
+ */
464
+ typedef struct CUpti_Profiler_FlushCounterData_Params
465
+ {
466
+ size_t structSize; //!< [in] CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE
467
+ void* pPriv; //!< [in] assign to NULL
468
+
469
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
470
+ size_t numRangesDropped; //!< [out] number of ranges whose data was dropped in the processed passes
471
+ size_t numTraceBytesDropped; //!< [out] number of bytes not written to TraceBuffer due to buffer full
472
+ } CUpti_Profiler_FlushCounterData_Params;
473
+ #define CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_FlushCounterData_Params, numTraceBytesDropped)
474
+
475
+ /**
476
+ * \brief Decode all the submitted passes
477
+ *
478
+ * Flush Counter data API to ensure every pass is decoded into the counterDataImage passed at beginSession.
479
+ * This will cause the CPU/GPU sync to collect all the undecoded pass.
480
+ */
481
+ CUptiResult CUPTIAPI cuptiProfilerFlushCounterData(CUpti_Profiler_FlushCounterData_Params* pParams);
482
+
483
+ typedef struct CUpti_Profiler_PushRange_Params
484
+ {
485
+ size_t structSize; //!< [in] CUpti_Profiler_PushRange_Params_STRUCT_SIZE
486
+ void* pPriv; //!< [in] assign to NULL
487
+
488
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
489
+ const char* pRangeName; //!< [in] specifies the range for subsequent launches; must not be NULL
490
+ size_t rangeNameLength; //!< [in] assign to strlen(pRangeName) if known; if set to zero, the library will call strlen()
491
+ } CUpti_Profiler_PushRange_Params;
492
+ #define CUpti_Profiler_PushRange_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_PushRange_Params, rangeNameLength)
493
+
494
+ typedef struct CUpti_Profiler_PopRange_Params
495
+ {
496
+ size_t structSize; //!< [in] CUpti_Profiler_PopRange_Params_STRUCT_SIZE
497
+ void* pPriv; //!< [in] assign to NULL
498
+
499
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
500
+ } CUpti_Profiler_PopRange_Params;
501
+ #define CUpti_Profiler_PopRange_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_PopRange_Params, ctx)
502
+
503
+
504
+ /**
505
+ * \brief Range API's : Push user range
506
+ *
507
+ * Counter data is collected per unique range-stack. Identified by a string label passsed by the user.
508
+ * It's an invalid operation in case of \ref CUPTI_AutoRange.
509
+ */
510
+ CUptiResult CUPTIAPI cuptiProfilerPushRange(CUpti_Profiler_PushRange_Params *pParams);
511
+
512
+ /**
513
+ * \brief Range API's : Pop user range
514
+ *
515
+ * Counter data is collected per unique range-stack. Identified by a string label passsed by the user.
516
+ * It's an invalid operation in case of \ref CUPTI_AutoRange.
517
+ */
518
+ CUptiResult CUPTIAPI cuptiProfilerPopRange(CUpti_Profiler_PopRange_Params *pParams);
519
+
520
+ /**
521
+ * \brief Params for cuptiProfilerGetCounterAvailability
522
+ */
523
+ typedef struct CUpti_Profiler_GetCounterAvailability_Params
524
+ {
525
+ size_t structSize; //!< [in] CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE
526
+ void* pPriv; //!< [in] assign to NULL
527
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
528
+ size_t counterAvailabilityImageSize; //!< [in/out] If `pCounterAvailabilityImage` is NULL, then the required size is returned in
529
+ //!< `counterAvailabilityImageSize`, otherwise `counterAvailabilityImageSize` should be set to the size of
530
+ //!< `pCounterAvailabilityImage`, and on return it would be overwritten with number of actual bytes copied
531
+ uint8_t* pCounterAvailabilityImage; //!< [in] buffer receiving counter availability image, may be NULL
532
+ } CUpti_Profiler_GetCounterAvailability_Params;
533
+ #define CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_GetCounterAvailability_Params, pCounterAvailabilityImage)
534
+
535
+ /**
536
+ * \brief Query counter availibility
537
+ *
538
+ * Use this API to query counter availability information in a buffer which can be used to filter unavailable raw metrics on host.
539
+ * Note: This API may fail, if any profiling or sampling session is active on the specified context or its device.
540
+ */
541
+ CUptiResult CUPTIAPI cuptiProfilerGetCounterAvailability(CUpti_Profiler_GetCounterAvailability_Params *pParams);
542
+
543
+ /// Generic support level enum for CUPTI
544
+ typedef enum
545
+ {
546
+ CUPTI_PROFILER_CONFIGURATION_UNKNOWN = 0, //!< Configuration support level unknown - either detection code errored out before setting this value, or unable to determine it
547
+ CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED, //!< Profiling is unavailable. For specific feature fields, this means that the current configuration of this feature does not work with profiling. For instance, SLI-enabled devices do not support profiling, and this value would be returned for SLI on an SLI-enabled device.
548
+ CUPTI_PROFILER_CONFIGURATION_DISABLED, //!< Profiling would be available for this configuration, but was disabled by the system
549
+ CUPTI_PROFILER_CONFIGURATION_SUPPORTED //!< Profiling is supported. For specific feature fields, this means that the current configuration of this feature works with profiling. For instance, SLI-enabled devices do not support profiling, and this value would only be returned for devices which are not SLI-enabled.
550
+ } CUpti_Profiler_Support_Level;
551
+
552
+ /**
553
+ * \brief Profiler API types
554
+ */
555
+ typedef enum
556
+ {
557
+ CUPTI_PROFILER_RANGE_PROFILING = 0, //!< CUPTI APIs for range based profiling (cuptiProfiler*)
558
+ CUPTI_PROFILER_PC_SAMPLING, //!< CUPTI APIs collecting pc sampling data (cuptiPcSampling*)
559
+ CUPTI_PROFILER_SASS_METRICS, //!< CUPTI APIs collecting SASS metrics data (cuptiSassMetrics*)
560
+ CUPTI_PROFILER_PM_SAMPLING, //!< CUPTI APIs collecting PM Sampling data (cuptiPmSampling*)
561
+ CUPTI_PROFILER_UNKNOWN
562
+ } CUpti_Profiler_API;
563
+
564
+ /**
565
+ * \brief Params for cuptiProfilerDeviceSupported
566
+ */
567
+ typedef struct
568
+ {
569
+ size_t structSize; //!< [in] Must be CUpti_Profiler_DeviceSupported_Params_STRUCT_SIZE
570
+ void *pPriv; //!< [in] assign to NULL
571
+ CUdevice cuDevice; //!< [in] if NULL, the current CUcontext is used
572
+
573
+ CUpti_Profiler_Support_Level isSupported; //!< [out] overall SUPPORTED / UNSUPPORTED flag representing whether Profiling and PC Sampling APIs work on the given device and configuration. SUPPORTED if all following flags are SUPPORTED, UNSUPPORTED otherwise.
574
+
575
+ CUpti_Profiler_Support_Level architecture; //!< [out] SUPPORTED if the device architecture level supports the Profiling API (Compute Capability >= 7.0), UNSUPPORTED otherwise
576
+ CUpti_Profiler_Support_Level sli; //!< [out] SUPPORTED if SLI is not enabled, UNSUPPORTED otherwise
577
+ CUpti_Profiler_Support_Level vGpu; //!< [out] SUPPORTED if vGPU is supported and profiling is enabled, DISABLED if profiling is supported but not enabled, UNSUPPORTED otherwise
578
+ CUpti_Profiler_Support_Level confidentialCompute; //!< [out] SUPPORTED if confidential compute is not enabled, UNSUPPORTED otherwise
579
+ CUpti_Profiler_Support_Level cmp; //!< [out] SUPPORTED if not NVIDIA Crypto Mining Processors (CMP), UNSUPPORTED otherwise
580
+ CUpti_Profiler_Support_Level wsl; //!< [out] SUPPORTED if WSL supported, UNSUPPORTED otherwise
581
+ CUpti_Profiler_API api; //!< [in] the CUPTI API type for which device support will be checked
582
+ } CUpti_Profiler_DeviceSupported_Params;
583
+ #define CUpti_Profiler_DeviceSupported_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DeviceSupported_Params, api)
584
+
585
+ /**
586
+ * \brief Query device compatibility with Profiling API
587
+ *
588
+ * Use this call to determine whether a compute device and configuration are compatible with the Profiling API.
589
+ * If the configuration does not support profiling, one of several flags will indicate why.
590
+ */
591
+ CUptiResult CUPTIAPI cuptiProfilerDeviceSupported(CUpti_Profiler_DeviceSupported_Params *pParams);
592
+
593
+ /** @} */ /* END CUPTI_METRIC_API */
594
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
595
+ #pragma GCC visibility pop
596
+ #endif
597
+
598
+ #ifdef __cplusplus
599
+ } /* extern "C" */
600
+ #endif
601
+
602
+ #endif /*_CUPTI_PROFILER_TARGET_H_*/
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_range_profiler.h ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2024 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_RANGE_PROFILER_H_)
51
+ #define _CUPTI_RANGE_PROFILER_H_
52
+
53
+ #include <cuda.h>
54
+ #include <cupti_result.h>
55
+ #include <cupti_profiler_target.h>
56
+ #include <stddef.h>
57
+ #include <stdint.h>
58
+
59
+ #ifdef __cplusplus
60
+ extern "C" {
61
+ #endif
62
+
63
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
64
+ #pragma GCC visibility push(default)
65
+ #endif
66
+
67
+ /**
68
+ * \defgroup CUPTI_RANGE_PROFILER_API CUPTI Range Profiling API
69
+ * Functions, types, and enums that implement the CUPTI Range Profiling API.
70
+ * @{
71
+ */
72
+ #ifndef CUPTI_PROFILER_STRUCT_SIZE
73
+ #define CUPTI_PROFILER_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
74
+ #endif
75
+
76
+
77
+ typedef struct CUpti_RangeProfiler_Object CUpti_RangeProfiler_Object;
78
+
79
+ /**
80
+ * \brief Params for cuptiRangeProfilerSetConfig
81
+ */
82
+ typedef struct CUpti_RangeProfiler_SetConfig_Params
83
+ {
84
+ /// [in] Size of the data structure.
85
+ size_t structSize;
86
+ /// [in] Set to NULL.
87
+ void* pPriv;
88
+ /// [in] Range Profiler Object.
89
+ CUpti_RangeProfiler_Object* pRangeProfilerObject;
90
+ /// [in] Size of the config image.
91
+ size_t configSize;
92
+ /// [in] Config image.
93
+ const uint8_t* pConfig;
94
+ /// [in] Size of the counter data image.
95
+ size_t counterDataImageSize;
96
+ /// [in] Counter data image.
97
+ uint8_t* pCounterDataImage;
98
+ /// [in] Profiling Range mode.
99
+ CUpti_ProfilerRange range;
100
+ /// [in] Replay mode.
101
+ CUpti_ProfilerReplayMode replayMode;
102
+ /// [in] Maximum number of ranges that can be profiled in a pass.
103
+ size_t maxRangesPerPass;
104
+ /// [in] number of nesting level to be profiled. For Auto range mode, this should be set to 1.
105
+ uint16_t numNestingLevels;
106
+ /// [in] minimum nesting level to be profiled.
107
+ uint16_t minNestingLevel;
108
+ /// [in] Pass index for the replay session.
109
+ size_t passIndex;
110
+ /// [in] Target nesting level for the replay session.
111
+ uint16_t targetNestingLevel;
112
+ } CUpti_RangeProfiler_SetConfig_Params;
113
+
114
+ #define CUpti_RangeProfiler_SetConfig_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_SetConfig_Params, targetNestingLevel)
115
+
116
+ /**
117
+ * \brief Set the configuration for range profiler like maximum number of ranges per pass, number of nesting levels,
118
+ * range and replay mode and the config image which has scheduling info for metric collection.
119
+ *
120
+ * \param pParams A pointer to \ref CUpti_RangeProfiler_SetConfig_Params
121
+ *
122
+ * \retval CUPTI_SUCCESS
123
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
124
+ */
125
+ CUptiResult CUPTIAPI cuptiRangeProfilerSetConfig(CUpti_RangeProfiler_SetConfig_Params* pParams);
126
+
127
+ /**
128
+ * \brief Params for cuptiRangeProfilerEnable
129
+ */
130
+ typedef struct CUpti_RangeProfiler_Enable_Params
131
+ {
132
+ /// [in] Size of the data structure.
133
+ size_t structSize;
134
+ /// [in] Set to NULL.
135
+ void* pPriv;
136
+ /// [in] Context to be used for profiling.
137
+ CUcontext ctx;
138
+ /// [out] Range Profiler Object.
139
+ CUpti_RangeProfiler_Object* pRangeProfilerObject;
140
+ } CUpti_RangeProfiler_Enable_Params;
141
+ #define CUpti_RangeProfiler_Enable_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Enable_Params, pRangeProfilerObject)
142
+
143
+ /**
144
+ * \brief Create a range profiler object and enable range profiling on the CUDA context.
145
+ *
146
+ * \param pParams A pointer to \ref CUpti_RangeProfiler_Enable_Params
147
+ *
148
+ * \retval CUPTI_SUCCESS
149
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
150
+ * \retval CUPTI_ERROR_OUT_OF_MEMORY if memory allocation fails while creating the PM sampling object
151
+ * \retval CUPTI_ERROR_INSUFFICIENT_PRIVILEGES if the user does not have sufficient privileges to perform the operation
152
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
153
+ */
154
+ CUptiResult CUPTIAPI cuptiRangeProfilerEnable(CUpti_RangeProfiler_Enable_Params* pParams);
155
+
156
+ /**
157
+ * \brief Params for cuptiRangeProfilerDisable
158
+ */
159
+ typedef struct CUpti_RangeProfiler_Disable_Params
160
+ {
161
+ /// [in] Size of the data structure.
162
+ size_t structSize;
163
+ /// [in] Set to NULL.
164
+ void* pPriv;
165
+ /// [in] Range Profiler Object.
166
+ CUpti_RangeProfiler_Object* pRangeProfilerObject;
167
+ } CUpti_RangeProfiler_Disable_Params;
168
+ #define CUpti_RangeProfiler_Disable_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Disable_Params, pRangeProfilerObject)
169
+
170
+ /**
171
+ * \brief Disable the range profiler on the CUDA context and destroy the range profiler object.
172
+ *
173
+ * \param pParams A pointer to \ref CUpti_RangeProfiler_Disable_Params
174
+ *
175
+ * \retval CUPTI_SUCCESS
176
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
177
+ */
178
+ CUptiResult CUPTIAPI cuptiRangeProfilerDisable(CUpti_RangeProfiler_Disable_Params* pParams);
179
+
180
+ /**
181
+ * \brief Params for cuptiRangeProfilerStart
182
+ */
183
+ typedef struct CUpti_RangeProfiler_Start_Params
184
+ {
185
+ /// [in] Size of the data structure.
186
+ size_t structSize;
187
+ /// [in] Set to NULL.
188
+ void* pPriv;
189
+ /// [in] Range Profiler Object.
190
+ CUpti_RangeProfiler_Object* pRangeProfilerObject;
191
+ } CUpti_RangeProfiler_Start_Params;
192
+ #define CUpti_RangeProfiler_Start_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Start_Params, pRangeProfilerObject)
193
+
194
+ /**
195
+ * \brief Start the range profiler.
196
+ *
197
+ * \param pParams A pointer to \ref CUpti_RangeProfiler_Start_Params
198
+ *
199
+ * \retval CUPTI_SUCCESS
200
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
201
+ * \retval CUPTI_ERROR_INVALID_OPERATION if range profiler Start is called without enabling range profiler
202
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
203
+ */
204
+ CUptiResult CUPTIAPI cuptiRangeProfilerStart(CUpti_RangeProfiler_Start_Params* pParams);
205
+
206
+ /**
207
+ * \brief Params for cuptiRangeProfilerStop
208
+ */
209
+ typedef struct CUpti_RangeProfiler_Stop_Params
210
+ {
211
+ /// [in] Size of the data structure.
212
+ size_t structSize;
213
+ /// [in] Set to NULL.
214
+ void* pPriv;
215
+ /// [in] Range Profiler Object.
216
+ CUpti_RangeProfiler_Object* pRangeProfilerObject;
217
+ /// [out] pass index for the replay session.
218
+ size_t passIndex;
219
+ /// [out] target nesting level for the replay session.
220
+ size_t targetNestingLevel;
221
+ /// [out] 1 if all passes are submitted to GPU for collection, 0 otherwise.
222
+ uint8_t isAllPassSubmitted;
223
+ } CUpti_RangeProfiler_Stop_Params;
224
+ #define CUpti_RangeProfiler_Stop_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Stop_Params, isAllPassSubmitted)
225
+
226
+ /**
227
+ * \brief Stop the range profiler.
228
+ *
229
+ * \param pParams A pointer to \ref CUpti_RangeProfiler_Stop_Params
230
+ *
231
+ * \retval CUPTI_SUCCESS
232
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
233
+ * \retval CUPTI_ERROR_INVALID_OPERATION if range profiler Stop is called without enabling range profiler
234
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
235
+ */
236
+ CUptiResult CUPTIAPI cuptiRangeProfilerStop(CUpti_RangeProfiler_Stop_Params* pParams);
237
+
238
+ /**
239
+ * \brief Params for cuptiRangeProfilerPushRange
240
+ */
241
+ typedef struct CUpti_RangeProfiler_PushRange_Params
242
+ {
243
+ /// [in] Size of the data structure.
244
+ size_t structSize;
245
+ /// [in] Set to NULL.
246
+ void* pPriv;
247
+ /// [in] Range Profiler Object.
248
+ CUpti_RangeProfiler_Object* pRangeProfilerObject;
249
+ /// [in] Name of the range to be profiled (only valid for User range mode).
250
+ const char* pRangeName;
251
+ } CUpti_RangeProfiler_PushRange_Params;
252
+ #define CUpti_RangeProfiler_PushRange_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_PushRange_Params, pRangeName)
253
+
254
+ /**
255
+ * \brief Add a new range to the Range Profiler with a given range name.
256
+ * For nested ranges, this API should be called again for the innermost range. For profiling the nested
257
+ * range, users need to set the values for minNestingLevel and numNestingLevels in the SetConfig API.
258
+ *
259
+ * \param pParams A pointer to \ref CUpti_RangeProfiler_PushRange_Params
260
+ *
261
+ * \retval CUPTI_SUCCESS
262
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
263
+ * \retval CUPTI_ERROR_INVALID_OPERATION if range profiler PushRange is called without enabling range profiler
264
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
265
+ */
266
+ CUptiResult CUPTIAPI cuptiRangeProfilerPushRange(CUpti_RangeProfiler_PushRange_Params* pParams);
267
+
268
+ /**
269
+ * \brief Params for cuptiRangeProfilerPopRange
270
+ */
271
+ typedef struct CUpti_RangeProfiler_PopRange_Params
272
+ {
273
+ /// [in] Size of the data structure.
274
+ size_t structSize;
275
+ /// [in] Set to NULL.
276
+ void* pPriv;
277
+ /// [in] Range Profiler Object.
278
+ CUpti_RangeProfiler_Object* pRangeProfilerObject;
279
+ } CUpti_RangeProfiler_PopRange_Params;
280
+ #define CUpti_RangeProfiler_PopRange_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_PopRange_Params, pRangeProfilerObject)
281
+
282
+ /**
283
+ * \brief pop the current range to the Range Profiler.
284
+ * The number of pop range API call should be same as number of push ranges in the same order.
285
+ *
286
+ * \param pParams A pointer to \ref CUpti_RangeProfiler_PopRange_Params
287
+ *
288
+ * \retval CUPTI_SUCCESS
289
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
290
+ * \retval CUPTI_ERROR_INVALID_OPERATION if range profiler PopRange is called without enabling range profiler
291
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
292
+ */
293
+ CUptiResult CUPTIAPI cuptiRangeProfilerPopRange(CUpti_RangeProfiler_PopRange_Params* pParams);
294
+
295
+ /**
296
+ * \brief Params for cuptiRangeProfilerDecodeData
297
+ */
298
+ typedef struct CUpti_RangeProfiler_DecodeData_Params
299
+ {
300
+ /// [in] Size of the data structure.
301
+ size_t structSize;
302
+ /// [in] Set to NULL.
303
+ void* pPriv;
304
+ /// [in] Range Profiler Object.
305
+ CUpti_RangeProfiler_Object* pRangeProfilerObject;
306
+ /// [out] Number of ranges dropped in the processed passes.
307
+ size_t numOfRangeDropped;
308
+ } CUpti_RangeProfiler_DecodeData_Params;
309
+ #define CUpti_RangeProfiler_DecodeData_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_DecodeData_Params, numOfRangeDropped)
310
+
311
+ /**
312
+ * \brief Decode the profiling data stored in the hardware to the counter data image passed in the
313
+ * SetConfig API. This API should be called after cuptiRangeProfilerStop. The counter data image
314
+ * will be updated with the profiling data for the ranges profiled.
315
+ *
316
+ * For the cases where the number of ranges counter data image can store is less than the number of ranges
317
+ * profiled (= maxRangesPerPass in SetConfig API), the counter data image will report dropped ranges.
318
+ *
319
+ * \param pParams A pointer to \ref CUpti_RangeProfiler_DecodeData_Params
320
+ *
321
+ * \retval CUPTI_SUCCESS
322
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
323
+ * \retval CUPTI_ERROR_INVALID_OPERATION if range profiler DecodeData is called without enabling range profiler
324
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
325
+ */
326
+ CUptiResult CUPTIAPI cuptiRangeProfilerDecodeData(CUpti_RangeProfiler_DecodeData_Params* pParams);
327
+
328
+ /**
329
+ * \brief Params for cuptiRangeProfilerGetCounterDataSize
330
+ */
331
+ typedef struct CUpti_RangeProfiler_GetCounterDataSize_Params
332
+ {
333
+ /// [in] Size of the data structure.
334
+ size_t structSize;
335
+ /// [in] Set to NULL.
336
+ void* pPriv;
337
+ /// [in] Periodic sampler object.
338
+ CUpti_RangeProfiler_Object* pRangeProfilerObject;
339
+ /// [in] Names of the metrics to be collected.
340
+ const char** pMetricNames;
341
+ /// [in] Number of metrics to be collected.
342
+ size_t numMetrics;
343
+ /// [in] Maximum number of ranges to be stored in the counter data image.
344
+ size_t maxNumOfRanges;
345
+ /// [in] Maximum number of RangeTree nodes; must be >= maxNumOfRanges
346
+ uint32_t maxNumRangeTreeNodes;
347
+ /// [out] Size of the counter data image.
348
+ size_t counterDataSize;
349
+ } CUpti_RangeProfiler_GetCounterDataSize_Params;
350
+ #define CUpti_RangeProfiler_GetCounterDataSize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_GetCounterDataSize_Params, counterDataSize)
351
+
352
+ /**
353
+ * \brief Get the size of the counter data image required to store the profiling data for the ranges profiled.
354
+ *
355
+ * \param pParams A pointer to \ref CUpti_RangeProfiler_GetCounterDataSize_Params
356
+ *
357
+ * \retval CUPTI_SUCCESS
358
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
359
+ * \retval CUPTI_ERROR_INVALID_OPERATION if range profiler GetCounterDataSize is called without enabling range profiler
360
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
361
+ */
362
+ CUptiResult CUPTIAPI cuptiRangeProfilerGetCounterDataSize(CUpti_RangeProfiler_GetCounterDataSize_Params* pParams);
363
+
364
+ /**
365
+ * \brief Params for cuptiRangeProfilerCounterDataImageInitialize
366
+ */
367
+ typedef struct CUpti_RangeProfiler_CounterDataImage_Initialize_Params
368
+ {
369
+ /// [in] Size of the data structure.
370
+ size_t structSize;
371
+ /// [in] Set to NULL.
372
+ void* pPriv;
373
+ /// [in] Periodic sampler object.
374
+ CUpti_RangeProfiler_Object* pRangeProfilerObject;
375
+ /// [in] Size of the counter data image.
376
+ size_t counterDataSize;
377
+ /// [in] Counter data image.
378
+ uint8_t* pCounterData;
379
+ } CUpti_RangeProfiler_CounterDataImage_Initialize_Params;
380
+ #define CUpti_RangeProfiler_CounterDataImage_Initialize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_CounterDataImage_Initialize_Params, pCounterData)
381
+
382
+ /**
383
+ * \brief Initialize the counter data image with the profiling data for the ranges profiled.
384
+ *
385
+ * \param pParams A pointer to \ref CUpti_RangeProfiler_CounterDataImage_Initialize_Params
386
+ *
387
+ * \retval CUPTI_SUCCESS
388
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
389
+ * \retval CUPTI_ERROR_INVALID_OPERATION if range profiler CounterDataImageInitialize is called without enabling range profiler
390
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
391
+ */
392
+ CUptiResult CUPTIAPI cuptiRangeProfilerCounterDataImageInitialize(CUpti_RangeProfiler_CounterDataImage_Initialize_Params* pParams);
393
+
394
+ /**
395
+ * \brief Params for cuptiRangeProfilerGetCounterDataInfo
396
+ */
397
+ typedef struct CUpti_RangeProfiler_GetCounterDataInfo_Params
398
+ {
399
+ /// [in] Size of the data structure.
400
+ size_t structSize;
401
+ /// [in] Set to NULL.
402
+ void* pPriv;
403
+ /// [in] Counter data image.
404
+ const uint8_t* pCounterDataImage;
405
+ /// [in] Size of the counter data image.
406
+ size_t counterDataImageSize;
407
+ /// [out] Number of ranges in the counter data image.
408
+ size_t numTotalRanges;
409
+ } CUpti_RangeProfiler_GetCounterDataInfo_Params;
410
+ #define CUpti_RangeProfiler_GetCounterDataInfo_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_GetCounterDataInfo_Params, numTotalRanges)
411
+
412
+ /**
413
+ * \brief Get the number of ranges stored in the counter data image.
414
+ *
415
+ * \param pParams A pointer to \ref CUpti_RangeProfiler_GetCounterDataInfo_Params
416
+ *
417
+ * \retval CUPTI_SUCCESS
418
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
419
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
420
+ */
421
+ CUptiResult CUPTIAPI cuptiRangeProfilerGetCounterDataInfo(CUpti_RangeProfiler_GetCounterDataInfo_Params* pParams);
422
+
423
+ /**
424
+ * \brief Params for cuptiRangeProfilerCounterDataGetRangeInfo
425
+ */
426
+ typedef struct CUpti_RangeProfiler_CounterData_GetRangeInfo_Params
427
+ {
428
+ /// [in] Size of the data structure.
429
+ size_t structSize;
430
+ /// [in] Set to NULL.
431
+ void* pPriv;
432
+ /// [in] Counter data image.
433
+ const uint8_t* pCounterDataImage;
434
+ /// [in] Size of the counter data image.
435
+ size_t counterDataImageSize;
436
+ /// [in] Index of the sample.
437
+ size_t rangeIndex;
438
+ /// [in] range delimiter.
439
+ const char* rangeDelimiter;
440
+ /// [out] RangeName;
441
+ const char* rangeName;
442
+ } CUpti_RangeProfiler_CounterData_GetRangeInfo_Params;
443
+ #define CUpti_RangeProfiler_CounterData_GetRangeInfo_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_CounterData_GetRangeInfo_Params, rangeName)
444
+
445
+ /**
446
+ * \brief Get the range name for the given range index.
447
+ *
448
+ * \param pParams A pointer to \ref CUpti_RangeProfiler_CounterData_GetRangeInfo_Params
449
+ *
450
+ * \retval CUPTI_SUCCESS
451
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
452
+ * \retval CUPTI_ERROR_UNKNOWN for any internal error
453
+ */
454
+ CUptiResult CUPTIAPI cuptiRangeProfilerCounterDataGetRangeInfo(CUpti_RangeProfiler_CounterData_GetRangeInfo_Params* pParams);
455
+
456
+
457
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
458
+ #pragma GCC visibility pop
459
+ #endif
460
+
461
+ #ifdef __cplusplus
462
+ } /* extern "C" */
463
+ #endif
464
+
465
+ #endif /*_CUPTI_RANGE_PROFILER_H_*/
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_result.h ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2010-2024 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_RESULT_H_)
51
+ #define _CUPTI_RESULT_H_
52
+
53
+ #ifndef CUPTIAPI
54
+ #ifdef _WIN32
55
+ #define CUPTIAPI __stdcall
56
+ #else
57
+ #define CUPTIAPI
58
+ #endif
59
+ #endif
60
+
61
+ #if defined(__cplusplus)
62
+ extern "C" {
63
+ #endif
64
+
65
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
66
+ #pragma GCC visibility push(default)
67
+ #endif
68
+
69
+ /**
70
+ * \defgroup CUPTI_RESULT_API CUPTI Result Codes
71
+ * Error and result codes returned by CUPTI functions.
72
+ * @{
73
+ */
74
+
75
+ /**
76
+ * \brief CUPTI result codes.
77
+ *
78
+ * Error and result codes returned by CUPTI functions.
79
+ */
80
+ typedef enum {
81
+ /**
82
+ * No error.
83
+ */
84
+ CUPTI_SUCCESS = 0,
85
+ /**
86
+ * One or more of the parameters is invalid.
87
+ */
88
+ CUPTI_ERROR_INVALID_PARAMETER = 1,
89
+ /**
90
+ * The device does not correspond to a valid CUDA device.
91
+ */
92
+ CUPTI_ERROR_INVALID_DEVICE = 2,
93
+ /**
94
+ * The context is NULL or not valid.
95
+ */
96
+ CUPTI_ERROR_INVALID_CONTEXT = 3,
97
+ /**
98
+ * The event domain id is invalid.
99
+ */
100
+ CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID = 4,
101
+ /**
102
+ * The event id is invalid.
103
+ */
104
+ CUPTI_ERROR_INVALID_EVENT_ID = 5,
105
+ /**
106
+ * The event name is invalid.
107
+ */
108
+ CUPTI_ERROR_INVALID_EVENT_NAME = 6,
109
+ /**
110
+ * The current operation cannot be performed due to dependency on
111
+ * other factors.
112
+ */
113
+ CUPTI_ERROR_INVALID_OPERATION = 7,
114
+ /**
115
+ * Unable to allocate enough memory to perform the requested
116
+ * operation.
117
+ */
118
+ CUPTI_ERROR_OUT_OF_MEMORY = 8,
119
+ /**
120
+ * An error occurred on the performance monitoring hardware.
121
+ */
122
+ CUPTI_ERROR_HARDWARE = 9,
123
+ /**
124
+ * The output buffer size is not sufficient to return all
125
+ * requested data.
126
+ */
127
+ CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT = 10,
128
+ /**
129
+ * API is not implemented.
130
+ */
131
+ CUPTI_ERROR_API_NOT_IMPLEMENTED = 11,
132
+ /**
133
+ * The maximum limit is reached.
134
+ */
135
+ CUPTI_ERROR_MAX_LIMIT_REACHED = 12,
136
+ /**
137
+ * The object is not yet ready to perform the requested operation.
138
+ */
139
+ CUPTI_ERROR_NOT_READY = 13,
140
+ /**
141
+ * The current operation is not compatible with the current state
142
+ * of the object
143
+ */
144
+ CUPTI_ERROR_NOT_COMPATIBLE = 14,
145
+ /**
146
+ * CUPTI is unable to initialize its connection to the CUDA
147
+ * driver.
148
+ */
149
+ CUPTI_ERROR_NOT_INITIALIZED = 15,
150
+ /**
151
+ * The metric id is invalid.
152
+ */
153
+ CUPTI_ERROR_INVALID_METRIC_ID = 16,
154
+ /**
155
+ * The metric name is invalid.
156
+ */
157
+ CUPTI_ERROR_INVALID_METRIC_NAME = 17,
158
+ /**
159
+ * The queue is empty.
160
+ */
161
+ CUPTI_ERROR_QUEUE_EMPTY = 18,
162
+ /**
163
+ * Invalid handle (internal?).
164
+ */
165
+ CUPTI_ERROR_INVALID_HANDLE = 19,
166
+ /**
167
+ * Invalid stream.
168
+ */
169
+ CUPTI_ERROR_INVALID_STREAM = 20,
170
+ /**
171
+ * Invalid kind.
172
+ */
173
+ CUPTI_ERROR_INVALID_KIND = 21,
174
+ /**
175
+ * Invalid event value.
176
+ */
177
+ CUPTI_ERROR_INVALID_EVENT_VALUE = 22,
178
+ /**
179
+ * CUPTI is disabled due to conflicts with other enabled profilers
180
+ */
181
+ CUPTI_ERROR_DISABLED = 23,
182
+ /**
183
+ * Invalid module.
184
+ */
185
+ CUPTI_ERROR_INVALID_MODULE = 24,
186
+ /**
187
+ * Invalid metric value.
188
+ */
189
+ CUPTI_ERROR_INVALID_METRIC_VALUE = 25,
190
+ /**
191
+ * The performance monitoring hardware is in use by other client.
192
+ */
193
+ CUPTI_ERROR_HARDWARE_BUSY = 26,
194
+ /**
195
+ * The attempted operation is not supported on the current
196
+ * system or device.
197
+ */
198
+ CUPTI_ERROR_NOT_SUPPORTED = 27,
199
+ /**
200
+ * Unified memory profiling is not supported on the system.
201
+ * Potential reason could be unsupported OS or architecture.
202
+ */
203
+ CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED = 28,
204
+ /**
205
+ * Unified memory profiling is not supported on the device
206
+ */
207
+ CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE = 29,
208
+ /**
209
+ * Unified memory profiling is not supported on a multi-GPU
210
+ * configuration without P2P support between any pair of devices
211
+ */
212
+ CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES = 30,
213
+ /**
214
+ * Unified memory profiling is not supported under the
215
+ * Multi-Process Service (MPS) environment. CUDA 7.5 removes this
216
+ * restriction.
217
+ */
218
+ CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_WITH_MPS = 31,
219
+ /**
220
+ * In CUDA 9.0, devices with compute capability 7.0 don't
221
+ * support CDP tracing
222
+ */
223
+ CUPTI_ERROR_CDP_TRACING_NOT_SUPPORTED = 32,
224
+ /**
225
+ * Profiling on virtualized GPU is not supported.
226
+ */
227
+ CUPTI_ERROR_VIRTUALIZED_DEVICE_NOT_SUPPORTED = 33,
228
+ /**
229
+ * Profiling results might be incorrect for CUDA applications
230
+ * compiled with nvcc version older than 9.0 for devices with
231
+ * compute capability 6.0 and 6.1.
232
+ * Profiling session will continue and CUPTI will notify it using this error code.
233
+ * User is advised to recompile the application code with nvcc version 9.0 or later.
234
+ * Ignore this warning if code is already compiled with the recommended nvcc version.
235
+ */
236
+ CUPTI_ERROR_CUDA_COMPILER_NOT_COMPATIBLE = 34,
237
+ /**
238
+ * User doesn't have sufficient privileges which are required to
239
+ * start the profiling session.
240
+ * One possible reason for this may be that the NVIDIA driver or your system
241
+ * administrator may have restricted access to the NVIDIA GPU performance counters.
242
+ * To learn how to resolve this issue and find more information, please visit
243
+ * https://developer.nvidia.com/CUPTI_ERROR_INSUFFICIENT_PRIVILEGES
244
+ */
245
+ CUPTI_ERROR_INSUFFICIENT_PRIVILEGES = 35,
246
+ /**
247
+ * Legacy CUPTI Profiling API i.e. event API from the header cupti_events.h and
248
+ * metric API from the header cupti_metrics.h are not compatible with the
249
+ * Profiling API in the header cupti_profiler_target.h and Perfworks metrics API
250
+ * in the headers nvperf_host.h and nvperf_target.h.
251
+ */
252
+ CUPTI_ERROR_OLD_PROFILER_API_INITIALIZED = 36,
253
+ /**
254
+ * Missing definition of the OpenACC API routine in the linked OpenACC library.
255
+ *
256
+ * One possible reason is that OpenACC library is linked statically in the
257
+ * user application, which might not have the definition of all the OpenACC
258
+ * API routines needed for the OpenACC profiling, as compiler might ignore
259
+ * definitions for the functions not used in the application. This issue
260
+ * can be mitigated by linking the OpenACC library dynamically.
261
+ */
262
+ CUPTI_ERROR_OPENACC_UNDEFINED_ROUTINE = 37,
263
+ /**
264
+ * Legacy CUPTI Profiling API i.e. event API from the header cupti_events.h and
265
+ * metric API from the header cupti_metrics.h are not supported on devices with
266
+ * compute capability 7.5 and higher (i.e. Turing and later GPU architectures).
267
+ * These APIs are deprecated in the CUDA 12.8 release and will be removed in a future CUDA release.
268
+ * These are replaced by the host profiling API in the header cupti_profiler_host.h and
269
+ * target profiling API in the header cupti_range_profiler.h which are supported on
270
+ * devices with compute capability 7.0 and higher (i.e. Volta and later GPU
271
+ * architectures).
272
+ */
273
+ CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED = 38,
274
+ /**
275
+ * CUPTI doesn't allow multiple callback subscribers. Only a single subscriber
276
+ * can be registered at a time.
277
+ * Same error code is used when application is launched using NVIDIA tools
278
+ * like nvprof, Visual Profiler, Nsight Systems, Nsight Compute, cuda-gdb and
279
+ * cuda-memcheck.
280
+ */
281
+ CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED = 39,
282
+ /**
283
+ * Profiling on virtualized GPU is not allowed by hypervisor.
284
+ */
285
+ CUPTI_ERROR_VIRTUALIZED_DEVICE_INSUFFICIENT_PRIVILEGES = 40,
286
+ /**
287
+ * Profiling and tracing are not allowed when confidential computing mode
288
+ * is enabled.
289
+ */
290
+ CUPTI_ERROR_CONFIDENTIAL_COMPUTING_NOT_SUPPORTED = 41,
291
+ /**
292
+ * CUPTI does not support NVIDIA Crypto Mining Processors (CMP).
293
+ * For more information, please visit https://developer.nvidia.com/ERR_NVCMPGPU
294
+ */
295
+ CUPTI_ERROR_CMP_DEVICE_NOT_SUPPORTED = 42,
296
+ /**
297
+ * Profiling on Multi-instance GPU (MIG) is not supported.
298
+ */
299
+ CUPTI_ERROR_MIG_DEVICE_NOT_SUPPORTED = 43,
300
+ /**
301
+ * Profiling on SLI device is not supported.
302
+ */
303
+ CUPTI_ERROR_SLI_DEVICE_NOT_SUPPORTED = 44,
304
+ /**
305
+ * Profiling on WSL device is not supported.
306
+ */
307
+ CUPTI_ERROR_WSL_DEVICE_NOT_SUPPORTED = 45,
308
+ /**
309
+ * An unknown internal error has occurred.
310
+ */
311
+ CUPTI_ERROR_UNKNOWN = 999,
312
+ CUPTI_ERROR_FORCE_INT = 0x7fffffff
313
+ } CUptiResult;
314
+
315
+ /**
316
+ * \brief Get the descriptive string for a CUptiResult.
317
+ *
318
+ * Return the descriptive string for a CUptiResult in \p *str.
319
+ * \note \b Thread-safety: this function is thread safe.
320
+ *
321
+ * \param result The result to get the string for
322
+ * \param str Returns the string
323
+ *
324
+ * \retval CUPTI_SUCCESS on success
325
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p str is NULL or \p
326
+ * result is not a valid CUptiResult
327
+ */
328
+ CUptiResult CUPTIAPI cuptiGetResultString(CUptiResult result, const char **str);
329
+
330
+ /**
331
+ * @brief Get the descriptive message corresponding to error codes returned
332
+ * by CUPTI.
333
+ *
334
+ * Return the descriptive error message for a CUptiResult in \p *str.
335
+ * \note \b Thread-safety: this function is thread safe.
336
+ *
337
+ * \param result The result to get the descriptive error message for
338
+ * \param str Returns the error message string
339
+ *
340
+ * \retval CUPTI_SUCCESS on success
341
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p str is NULL or \p
342
+ * result is not a valid CUptiResult
343
+ *
344
+ */
345
+
346
+ CUptiResult CUPTIAPI cuptiGetErrorMessage(CUptiResult result, const char **str);
347
+
348
+ /** @} */ /* END CUPTI_RESULT_API */
349
+
350
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
351
+ #pragma GCC visibility pop
352
+ #endif
353
+
354
+ #if defined(__cplusplus)
355
+ }
356
+ #endif
357
+
358
+ #endif /*_CUPTI_RESULT_H_*/
359
+
360
+
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_runtime_cbid.h ADDED
@@ -0,0 +1,504 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ // *************************************************************************
3
+ // Definitions of indices for API functions, unique across entire API
4
+ // *************************************************************************
5
+
6
+ // This file is generated. Any changes you make will be lost during the next clean build.
7
+ // CUDA public interface, for type definitions and cu* function prototypes
8
+
9
+ #if !defined(_CUPTI_RUNTIME_CBID_H)
10
+ #define _CUPTI_RUNTIME_CBID_H
11
+
12
+ typedef enum CUpti_runtime_api_trace_cbid_enum {
13
+ CUPTI_RUNTIME_TRACE_CBID_INVALID = 0,
14
+ CUPTI_RUNTIME_TRACE_CBID_cudaDriverGetVersion_v3020 = 1,
15
+ CUPTI_RUNTIME_TRACE_CBID_cudaRuntimeGetVersion_v3020 = 2,
16
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceCount_v3020 = 3,
17
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceProperties_v3020 = 4,
18
+ CUPTI_RUNTIME_TRACE_CBID_cudaChooseDevice_v3020 = 5,
19
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetChannelDesc_v3020 = 6,
20
+ CUPTI_RUNTIME_TRACE_CBID_cudaCreateChannelDesc_v3020 = 7,
21
+ CUPTI_RUNTIME_TRACE_CBID_cudaConfigureCall_v3020 = 8,
22
+ CUPTI_RUNTIME_TRACE_CBID_cudaSetupArgument_v3020 = 9,
23
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetLastError_v3020 = 10,
24
+ CUPTI_RUNTIME_TRACE_CBID_cudaPeekAtLastError_v3020 = 11,
25
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetErrorString_v3020 = 12,
26
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_v3020 = 13,
27
+ CUPTI_RUNTIME_TRACE_CBID_cudaFuncSetCacheConfig_v3020 = 14,
28
+ CUPTI_RUNTIME_TRACE_CBID_cudaFuncGetAttributes_v3020 = 15,
29
+ CUPTI_RUNTIME_TRACE_CBID_cudaSetDevice_v3020 = 16,
30
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetDevice_v3020 = 17,
31
+ CUPTI_RUNTIME_TRACE_CBID_cudaSetValidDevices_v3020 = 18,
32
+ CUPTI_RUNTIME_TRACE_CBID_cudaSetDeviceFlags_v3020 = 19,
33
+ CUPTI_RUNTIME_TRACE_CBID_cudaMalloc_v3020 = 20,
34
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocPitch_v3020 = 21,
35
+ CUPTI_RUNTIME_TRACE_CBID_cudaFree_v3020 = 22,
36
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocArray_v3020 = 23,
37
+ CUPTI_RUNTIME_TRACE_CBID_cudaFreeArray_v3020 = 24,
38
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocHost_v3020 = 25,
39
+ CUPTI_RUNTIME_TRACE_CBID_cudaFreeHost_v3020 = 26,
40
+ CUPTI_RUNTIME_TRACE_CBID_cudaHostAlloc_v3020 = 27,
41
+ CUPTI_RUNTIME_TRACE_CBID_cudaHostGetDevicePointer_v3020 = 28,
42
+ CUPTI_RUNTIME_TRACE_CBID_cudaHostGetFlags_v3020 = 29,
43
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemGetInfo_v3020 = 30,
44
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020 = 31,
45
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2D_v3020 = 32,
46
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArray_v3020 = 33,
47
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArray_v3020 = 34,
48
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArray_v3020 = 35,
49
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArray_v3020 = 36,
50
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyArrayToArray_v3020 = 37,
51
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DArrayToArray_v3020 = 38,
52
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbol_v3020 = 39,
53
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbol_v3020 = 40,
54
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyAsync_v3020 = 41,
55
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArrayAsync_v3020 = 42,
56
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArrayAsync_v3020 = 43,
57
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DAsync_v3020 = 44,
58
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArrayAsync_v3020 = 45,
59
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArrayAsync_v3020 = 46,
60
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbolAsync_v3020 = 47,
61
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbolAsync_v3020 = 48,
62
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset_v3020 = 49,
63
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset2D_v3020 = 50,
64
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemsetAsync_v3020 = 51,
65
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset2DAsync_v3020 = 52,
66
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetSymbolAddress_v3020 = 53,
67
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetSymbolSize_v3020 = 54,
68
+ CUPTI_RUNTIME_TRACE_CBID_cudaBindTexture_v3020 = 55,
69
+ CUPTI_RUNTIME_TRACE_CBID_cudaBindTexture2D_v3020 = 56,
70
+ CUPTI_RUNTIME_TRACE_CBID_cudaBindTextureToArray_v3020 = 57,
71
+ CUPTI_RUNTIME_TRACE_CBID_cudaUnbindTexture_v3020 = 58,
72
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureAlignmentOffset_v3020 = 59,
73
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureReference_v3020 = 60,
74
+ CUPTI_RUNTIME_TRACE_CBID_cudaBindSurfaceToArray_v3020 = 61,
75
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetSurfaceReference_v3020 = 62,
76
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLSetGLDevice_v3020 = 63,
77
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLRegisterBufferObject_v3020 = 64,
78
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLMapBufferObject_v3020 = 65,
79
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLUnmapBufferObject_v3020 = 66,
80
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLUnregisterBufferObject_v3020 = 67,
81
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLSetBufferObjectMapFlags_v3020 = 68,
82
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLMapBufferObjectAsync_v3020 = 69,
83
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLUnmapBufferObjectAsync_v3020 = 70,
84
+ CUPTI_RUNTIME_TRACE_CBID_cudaWGLGetDevice_v3020 = 71,
85
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsGLRegisterImage_v3020 = 72,
86
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsGLRegisterBuffer_v3020 = 73,
87
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsUnregisterResource_v3020 = 74,
88
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceSetMapFlags_v3020 = 75,
89
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsMapResources_v3020 = 76,
90
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsUnmapResources_v3020 = 77,
91
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceGetMappedPointer_v3020 = 78,
92
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsSubResourceGetMappedArray_v3020 = 79,
93
+ CUPTI_RUNTIME_TRACE_CBID_cudaVDPAUGetDevice_v3020 = 80,
94
+ CUPTI_RUNTIME_TRACE_CBID_cudaVDPAUSetVDPAUDevice_v3020 = 81,
95
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsVDPAURegisterVideoSurface_v3020 = 82,
96
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsVDPAURegisterOutputSurface_v3020 = 83,
97
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D11GetDevice_v3020 = 84,
98
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D11GetDevices_v3020 = 85,
99
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D11SetDirect3DDevice_v3020 = 86,
100
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsD3D11RegisterResource_v3020 = 87,
101
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10GetDevice_v3020 = 88,
102
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10GetDevices_v3020 = 89,
103
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10SetDirect3DDevice_v3020 = 90,
104
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsD3D10RegisterResource_v3020 = 91,
105
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10RegisterResource_v3020 = 92,
106
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10UnregisterResource_v3020 = 93,
107
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10MapResources_v3020 = 94,
108
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10UnmapResources_v3020 = 95,
109
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceSetMapFlags_v3020 = 96,
110
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetSurfaceDimensions_v3020 = 97,
111
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedArray_v3020 = 98,
112
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedPointer_v3020 = 99,
113
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedSize_v3020 = 100,
114
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedPitch_v3020 = 101,
115
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9GetDevice_v3020 = 102,
116
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9GetDevices_v3020 = 103,
117
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9SetDirect3DDevice_v3020 = 104,
118
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9GetDirect3DDevice_v3020 = 105,
119
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsD3D9RegisterResource_v3020 = 106,
120
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9RegisterResource_v3020 = 107,
121
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnregisterResource_v3020 = 108,
122
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9MapResources_v3020 = 109,
123
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnmapResources_v3020 = 110,
124
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceSetMapFlags_v3020 = 111,
125
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetSurfaceDimensions_v3020 = 112,
126
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedArray_v3020 = 113,
127
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedPointer_v3020 = 114,
128
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedSize_v3020 = 115,
129
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedPitch_v3020 = 116,
130
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9Begin_v3020 = 117,
131
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9End_v3020 = 118,
132
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9RegisterVertexBuffer_v3020 = 119,
133
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnregisterVertexBuffer_v3020 = 120,
134
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9MapVertexBuffer_v3020 = 121,
135
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnmapVertexBuffer_v3020 = 122,
136
+ CUPTI_RUNTIME_TRACE_CBID_cudaThreadExit_v3020 = 123,
137
+ CUPTI_RUNTIME_TRACE_CBID_cudaSetDoubleForDevice_v3020 = 124,
138
+ CUPTI_RUNTIME_TRACE_CBID_cudaSetDoubleForHost_v3020 = 125,
139
+ CUPTI_RUNTIME_TRACE_CBID_cudaThreadSynchronize_v3020 = 126,
140
+ CUPTI_RUNTIME_TRACE_CBID_cudaThreadGetLimit_v3020 = 127,
141
+ CUPTI_RUNTIME_TRACE_CBID_cudaThreadSetLimit_v3020 = 128,
142
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamCreate_v3020 = 129,
143
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamDestroy_v3020 = 130,
144
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamSynchronize_v3020 = 131,
145
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamQuery_v3020 = 132,
146
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventCreate_v3020 = 133,
147
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventCreateWithFlags_v3020 = 134,
148
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventRecord_v3020 = 135,
149
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventDestroy_v3020 = 136,
150
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventSynchronize_v3020 = 137,
151
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventQuery_v3020 = 138,
152
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventElapsedTime_v3020 = 139,
153
+ CUPTI_RUNTIME_TRACE_CBID_cudaMalloc3D_v3020 = 140,
154
+ CUPTI_RUNTIME_TRACE_CBID_cudaMalloc3DArray_v3020 = 141,
155
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset3D_v3020 = 142,
156
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset3DAsync_v3020 = 143,
157
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3D_v3020 = 144,
158
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DAsync_v3020 = 145,
159
+ CUPTI_RUNTIME_TRACE_CBID_cudaThreadSetCacheConfig_v3020 = 146,
160
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamWaitEvent_v3020 = 147,
161
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D11GetDirect3DDevice_v3020 = 148,
162
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10GetDirect3DDevice_v3020 = 149,
163
+ CUPTI_RUNTIME_TRACE_CBID_cudaThreadGetCacheConfig_v3020 = 150,
164
+ CUPTI_RUNTIME_TRACE_CBID_cudaPointerGetAttributes_v4000 = 151,
165
+ CUPTI_RUNTIME_TRACE_CBID_cudaHostRegister_v4000 = 152,
166
+ CUPTI_RUNTIME_TRACE_CBID_cudaHostUnregister_v4000 = 153,
167
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceCanAccessPeer_v4000 = 154,
168
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceEnablePeerAccess_v4000 = 155,
169
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceDisablePeerAccess_v4000 = 156,
170
+ CUPTI_RUNTIME_TRACE_CBID_cudaPeerRegister_v4000 = 157,
171
+ CUPTI_RUNTIME_TRACE_CBID_cudaPeerUnregister_v4000 = 158,
172
+ CUPTI_RUNTIME_TRACE_CBID_cudaPeerGetDevicePointer_v4000 = 159,
173
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyPeer_v4000 = 160,
174
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyPeerAsync_v4000 = 161,
175
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeer_v4000 = 162,
176
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeerAsync_v4000 = 163,
177
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceReset_v3020 = 164,
178
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSynchronize_v3020 = 165,
179
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetLimit_v3020 = 166,
180
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetLimit_v3020 = 167,
181
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetCacheConfig_v3020 = 168,
182
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetCacheConfig_v3020 = 169,
183
+ CUPTI_RUNTIME_TRACE_CBID_cudaProfilerInitialize_v4000 = 170,
184
+ CUPTI_RUNTIME_TRACE_CBID_cudaProfilerStart_v4000 = 171,
185
+ CUPTI_RUNTIME_TRACE_CBID_cudaProfilerStop_v4000 = 172,
186
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetByPCIBusId_v4010 = 173,
187
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetPCIBusId_v4010 = 174,
188
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLGetDevices_v4010 = 175,
189
+ CUPTI_RUNTIME_TRACE_CBID_cudaIpcGetEventHandle_v4010 = 176,
190
+ CUPTI_RUNTIME_TRACE_CBID_cudaIpcOpenEventHandle_v4010 = 177,
191
+ CUPTI_RUNTIME_TRACE_CBID_cudaIpcGetMemHandle_v4010 = 178,
192
+ CUPTI_RUNTIME_TRACE_CBID_cudaIpcOpenMemHandle_v4010 = 179,
193
+ CUPTI_RUNTIME_TRACE_CBID_cudaIpcCloseMemHandle_v4010 = 180,
194
+ CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetInfo_v4010 = 181,
195
+ CUPTI_RUNTIME_TRACE_CBID_cudaFuncSetSharedMemConfig_v4020 = 182,
196
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetSharedMemConfig_v4020 = 183,
197
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetSharedMemConfig_v4020 = 184,
198
+ CUPTI_RUNTIME_TRACE_CBID_cudaCreateTextureObject_v5000 = 185,
199
+ CUPTI_RUNTIME_TRACE_CBID_cudaDestroyTextureObject_v5000 = 186,
200
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectResourceDesc_v5000 = 187,
201
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectTextureDesc_v5000 = 188,
202
+ CUPTI_RUNTIME_TRACE_CBID_cudaCreateSurfaceObject_v5000 = 189,
203
+ CUPTI_RUNTIME_TRACE_CBID_cudaDestroySurfaceObject_v5000 = 190,
204
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetSurfaceObjectResourceDesc_v5000 = 191,
205
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocMipmappedArray_v5000 = 192,
206
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetMipmappedArrayLevel_v5000 = 193,
207
+ CUPTI_RUNTIME_TRACE_CBID_cudaFreeMipmappedArray_v5000 = 194,
208
+ CUPTI_RUNTIME_TRACE_CBID_cudaBindTextureToMipmappedArray_v5000 = 195,
209
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceGetMappedMipmappedArray_v5000 = 196,
210
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamAddCallback_v5000 = 197,
211
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamCreateWithFlags_v5000 = 198,
212
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectResourceViewDesc_v5000 = 199,
213
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetAttribute_v5000 = 200,
214
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamDestroy_v5050 = 201,
215
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamCreateWithPriority_v5050 = 202,
216
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetPriority_v5050 = 203,
217
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetFlags_v5050 = 204,
218
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetStreamPriorityRange_v5050 = 205,
219
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocManaged_v6000 = 206,
220
+ CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6000 = 207,
221
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamAttachMemAsync_v6000 = 208,
222
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetErrorName_v6050 = 209,
223
+ CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050 = 210,
224
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000 = 211,
225
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceFlags_v7000 = 212,
226
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_ptsz_v7000 = 213,
227
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_ptsz_v7000 = 214,
228
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_ptds_v7000 = 215,
229
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2D_ptds_v7000 = 216,
230
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArray_ptds_v7000 = 217,
231
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArray_ptds_v7000 = 218,
232
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArray_ptds_v7000 = 219,
233
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArray_ptds_v7000 = 220,
234
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyArrayToArray_ptds_v7000 = 221,
235
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DArrayToArray_ptds_v7000 = 222,
236
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbol_ptds_v7000 = 223,
237
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbol_ptds_v7000 = 224,
238
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyAsync_ptsz_v7000 = 225,
239
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArrayAsync_ptsz_v7000 = 226,
240
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArrayAsync_ptsz_v7000 = 227,
241
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DAsync_ptsz_v7000 = 228,
242
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArrayAsync_ptsz_v7000 = 229,
243
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArrayAsync_ptsz_v7000 = 230,
244
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbolAsync_ptsz_v7000 = 231,
245
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbolAsync_ptsz_v7000 = 232,
246
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset_ptds_v7000 = 233,
247
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset2D_ptds_v7000 = 234,
248
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemsetAsync_ptsz_v7000 = 235,
249
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset2DAsync_ptsz_v7000 = 236,
250
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetPriority_ptsz_v7000 = 237,
251
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetFlags_ptsz_v7000 = 238,
252
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamSynchronize_ptsz_v7000 = 239,
253
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamQuery_ptsz_v7000 = 240,
254
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamAttachMemAsync_ptsz_v7000 = 241,
255
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventRecord_ptsz_v7000 = 242,
256
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset3D_ptds_v7000 = 243,
257
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset3DAsync_ptsz_v7000 = 244,
258
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3D_ptds_v7000 = 245,
259
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DAsync_ptsz_v7000 = 246,
260
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamWaitEvent_ptsz_v7000 = 247,
261
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamAddCallback_ptsz_v7000 = 248,
262
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeer_ptds_v7000 = 249,
263
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeerAsync_ptsz_v7000 = 250,
264
+ CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_v7000 = 251,
265
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPrefetchAsync_v8000 = 252,
266
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPrefetchAsync_ptsz_v8000 = 253,
267
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemAdvise_v8000 = 254,
268
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetP2PAttribute_v8000 = 255,
269
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsEGLRegisterImage_v7000 = 256,
270
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerConnect_v7000 = 257,
271
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerDisconnect_v7000 = 258,
272
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerAcquireFrame_v7000 = 259,
273
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerReleaseFrame_v7000 = 260,
274
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerConnect_v7000 = 261,
275
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerDisconnect_v7000 = 262,
276
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerPresentFrame_v7000 = 263,
277
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerReturnFrame_v7000 = 264,
278
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceGetMappedEglFrame_v7000 = 265,
279
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemRangeGetAttribute_v8000 = 266,
280
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemRangeGetAttributes_v8000 = 267,
281
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerConnectWithFlags_v7000 = 268,
282
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernel_v9000 = 269,
283
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernel_ptsz_v9000 = 270,
284
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventCreateFromEGLSync_v9000 = 271,
285
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernelMultiDevice_v9000 = 272,
286
+ CUPTI_RUNTIME_TRACE_CBID_cudaFuncSetAttribute_v9000 = 273,
287
+ CUPTI_RUNTIME_TRACE_CBID_cudaImportExternalMemory_v10000 = 274,
288
+ CUPTI_RUNTIME_TRACE_CBID_cudaExternalMemoryGetMappedBuffer_v10000 = 275,
289
+ CUPTI_RUNTIME_TRACE_CBID_cudaExternalMemoryGetMappedMipmappedArray_v10000 = 276,
290
+ CUPTI_RUNTIME_TRACE_CBID_cudaDestroyExternalMemory_v10000 = 277,
291
+ CUPTI_RUNTIME_TRACE_CBID_cudaImportExternalSemaphore_v10000 = 278,
292
+ CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_v10000 = 279,
293
+ CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_ptsz_v10000 = 280,
294
+ CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_v10000 = 281,
295
+ CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_ptsz_v10000 = 282,
296
+ CUPTI_RUNTIME_TRACE_CBID_cudaDestroyExternalSemaphore_v10000 = 283,
297
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchHostFunc_v10000 = 284,
298
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchHostFunc_ptsz_v10000 = 285,
299
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphCreate_v10000 = 286,
300
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeGetParams_v10000 = 287,
301
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeSetParams_v10000 = 288,
302
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddKernelNode_v10000 = 289,
303
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNode_v10000 = 290,
304
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeGetParams_v10000 = 291,
305
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParams_v10000 = 292,
306
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemsetNode_v10000 = 293,
307
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemsetNodeGetParams_v10000 = 294,
308
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemsetNodeSetParams_v10000 = 295,
309
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddHostNode_v10000 = 296,
310
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphHostNodeGetParams_v10000 = 297,
311
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddChildGraphNode_v10000 = 298,
312
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphChildGraphNodeGetGraph_v10000 = 299,
313
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddEmptyNode_v10000 = 300,
314
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphClone_v10000 = 301,
315
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeFindInClone_v10000 = 302,
316
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetType_v10000 = 303,
317
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetRootNodes_v10000 = 304,
318
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetDependencies_v10000 = 305,
319
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetDependentNodes_v10000 = 306,
320
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddDependencies_v10000 = 307,
321
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphRemoveDependencies_v10000 = 308,
322
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphDestroyNode_v10000 = 309,
323
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiate_v10000 = 310,
324
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphLaunch_v10000 = 311,
325
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphLaunch_ptsz_v10000 = 312,
326
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecDestroy_v10000 = 313,
327
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphDestroy_v10000 = 314,
328
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamBeginCapture_v10000 = 315,
329
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamBeginCapture_ptsz_v10000 = 316,
330
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamIsCapturing_v10000 = 317,
331
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamIsCapturing_ptsz_v10000 = 318,
332
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamEndCapture_v10000 = 319,
333
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamEndCapture_ptsz_v10000 = 320,
334
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphHostNodeSetParams_v10000 = 321,
335
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetNodes_v10000 = 322,
336
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetEdges_v10000 = 323,
337
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v10010 = 324,
338
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_ptsz_v10010 = 325,
339
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecKernelNodeSetParams_v10010 = 326,
340
+ CUPTI_RUNTIME_TRACE_CBID_cudaThreadExchangeStreamCaptureMode_v10010 = 327,
341
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetNvSciSyncAttributes_v10020 = 328,
342
+ CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyAvailableDynamicSMemPerBlock_v10200 = 329,
343
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetFlags_v10200 = 330,
344
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetFlags_ptsz_v10200 = 331,
345
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParams_v10020 = 332,
346
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemsetNodeSetParams_v10020 = 333,
347
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecHostNodeSetParams_v10020 = 334,
348
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecUpdate_v10020 = 335,
349
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetFuncBySymbol_v11000 = 336,
350
+ CUPTI_RUNTIME_TRACE_CBID_cudaCtxResetPersistingL2Cache_v11000 = 337,
351
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeCopyAttributes_v11000 = 338,
352
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeGetAttribute_v11000 = 339,
353
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeSetAttribute_v11000 = 340,
354
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamCopyAttributes_v11000 = 341,
355
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamCopyAttributes_ptsz_v11000 = 342,
356
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetAttribute_v11000 = 343,
357
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetAttribute_ptsz_v11000 = 344,
358
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetAttribute_v11000 = 345,
359
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetAttribute_ptsz_v11000 = 346,
360
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetTexture1DLinearMaxWidth_v11010 = 347,
361
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphUpload_v10000 = 348,
362
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphUpload_ptsz_v10000 = 349,
363
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNodeToSymbol_v11010 = 350,
364
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNodeFromSymbol_v11010 = 351,
365
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNode1D_v11010 = 352,
366
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParamsToSymbol_v11010 = 353,
367
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParamsFromSymbol_v11010 = 354,
368
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParams1D_v11010 = 355,
369
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParamsToSymbol_v11010 = 356,
370
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParamsFromSymbol_v11010 = 357,
371
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParams1D_v11010 = 358,
372
+ CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetSparseProperties_v11010 = 359,
373
+ CUPTI_RUNTIME_TRACE_CBID_cudaMipmappedArrayGetSparseProperties_v11010 = 360,
374
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecChildGraphNodeSetParams_v11010 = 361,
375
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddEventRecordNode_v11010 = 362,
376
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventRecordNodeGetEvent_v11010 = 363,
377
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventRecordNodeSetEvent_v11010 = 364,
378
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddEventWaitNode_v11010 = 365,
379
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventWaitNodeGetEvent_v11010 = 366,
380
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventWaitNodeSetEvent_v11010 = 367,
381
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecEventRecordNodeSetEvent_v11010 = 368,
382
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecEventWaitNodeSetEvent_v11010 = 369,
383
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventRecordWithFlags_v11010 = 370,
384
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventRecordWithFlags_ptsz_v11010 = 371,
385
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetDefaultMemPool_v11020 = 372,
386
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocAsync_v11020 = 373,
387
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocAsync_ptsz_v11020 = 374,
388
+ CUPTI_RUNTIME_TRACE_CBID_cudaFreeAsync_v11020 = 375,
389
+ CUPTI_RUNTIME_TRACE_CBID_cudaFreeAsync_ptsz_v11020 = 376,
390
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolTrimTo_v11020 = 377,
391
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolSetAttribute_v11020 = 378,
392
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolGetAttribute_v11020 = 379,
393
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolSetAccess_v11020 = 380,
394
+ CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetPlane_v11020 = 381,
395
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolGetAccess_v11020 = 382,
396
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolCreate_v11020 = 383,
397
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolDestroy_v11020 = 384,
398
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetMemPool_v11020 = 385,
399
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetMemPool_v11020 = 386,
400
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolExportToShareableHandle_v11020 = 387,
401
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolImportFromShareableHandle_v11020 = 388,
402
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolExportPointer_v11020 = 389,
403
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolImportPointer_v11020 = 390,
404
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocFromPoolAsync_v11020 = 391,
405
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocFromPoolAsync_ptsz_v11020 = 392,
406
+ CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_v2_v11020 = 393,
407
+ CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_v2_ptsz_v11020 = 394,
408
+ CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_v2_v11020 = 395,
409
+ CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_v2_ptsz_v11020 = 396,
410
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddExternalSemaphoresSignalNode_v11020 = 397,
411
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresSignalNodeGetParams_v11020 = 398,
412
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresSignalNodeSetParams_v11020 = 399,
413
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddExternalSemaphoresWaitNode_v11020 = 400,
414
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresWaitNodeGetParams_v11020 = 401,
415
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresWaitNodeSetParams_v11020 = 402,
416
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecExternalSemaphoresSignalNodeSetParams_v11020 = 403,
417
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecExternalSemaphoresWaitNodeSetParams_v11020 = 404,
418
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceFlushGPUDirectRDMAWrites_v11030 = 405,
419
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetDriverEntryPoint_v11030 = 406,
420
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetDriverEntryPoint_ptsz_v11030 = 407,
421
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphDebugDotPrint_v11030 = 408,
422
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v2_v11030 = 409,
423
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v2_ptsz_v11030 = 410,
424
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamUpdateCaptureDependencies_v11030 = 411,
425
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamUpdateCaptureDependencies_ptsz_v11030 = 412,
426
+ CUPTI_RUNTIME_TRACE_CBID_cudaUserObjectCreate_v11030 = 413,
427
+ CUPTI_RUNTIME_TRACE_CBID_cudaUserObjectRetain_v11030 = 414,
428
+ CUPTI_RUNTIME_TRACE_CBID_cudaUserObjectRelease_v11030 = 415,
429
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphRetainUserObject_v11030 = 416,
430
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphReleaseUserObject_v11030 = 417,
431
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiateWithFlags_v11040 = 418,
432
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemAllocNode_v11040 = 419,
433
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemAllocNodeGetParams_v11040 = 420,
434
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemFreeNode_v11040 = 421,
435
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemFreeNodeGetParams_v11040 = 422,
436
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGraphMemTrim_v11040 = 423,
437
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetGraphMemAttribute_v11040 = 424,
438
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetGraphMemAttribute_v11040 = 425,
439
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeSetEnabled_v11060 = 426,
440
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetEnabled_v11060 = 427,
441
+ CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetMemoryRequirements_v11060 = 428,
442
+ CUPTI_RUNTIME_TRACE_CBID_cudaMipmappedArrayGetMemoryRequirements_v11060 = 429,
443
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernelExC_v11060 = 430,
444
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernelExC_ptsz_v11060 = 431,
445
+ CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxPotentialClusterSize_v11070 = 432,
446
+ CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveClusters_v11070 = 433,
447
+ CUPTI_RUNTIME_TRACE_CBID_cudaCreateTextureObject_v2_v11080 = 434,
448
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectTextureDesc_v2_v11080 = 435,
449
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiateWithParams_v12000 = 436,
450
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiateWithParams_ptsz_v12000 = 437,
451
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecGetFlags_v12000 = 438,
452
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetKernel_v12000 = 439,
453
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceProperties_v2_v12000 = 440,
454
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetId_v12000 = 441,
455
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetId_ptsz_v12000 = 442,
456
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiate_v12000 = 443,
457
+ CUPTI_RUNTIME_TRACE_CBID_cudaInitDevice_v12000 = 444,
458
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddNode_v12020 = 445,
459
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeSetParams_v12020 = 446,
460
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecNodeSetParams_v12020 = 447,
461
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemAdvise_v2_v12020 = 448,
462
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPrefetchAsync_v2_v12020 = 449,
463
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPrefetchAsync_v2_ptsz_v12020 = 450,
464
+ CUPTI_RUNTIME_TRACE_CBID_cudaFuncGetName_v12030 = 451,
465
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamBeginCaptureToGraph_v12030 = 452,
466
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamBeginCaptureToGraph_ptsz_v12030 = 453,
467
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphConditionalHandleCreate_v12030 = 454,
468
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetEdges_v2_v12030 = 455,
469
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetDependencies_v2_v12030 = 456,
470
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetDependentNodes_v2_v12030 = 457,
471
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddDependencies_v2_v12030 = 458,
472
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphRemoveDependencies_v2_v12030 = 459,
473
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddNode_v2_v12030 = 460,
474
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v3_v12030 = 461,
475
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v3_ptsz_v12030 = 462,
476
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamUpdateCaptureDependencies_v2_v12030 = 463,
477
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamUpdateCaptureDependencies_v2_ptsz_v12030 = 464,
478
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceRegisterAsyncNotification_v12040 = 465,
479
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceUnregisterAsyncNotification_v12040 = 466,
480
+ CUPTI_RUNTIME_TRACE_CBID_cudaFuncGetParamInfo_v12040 = 467,
481
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetDriverEntryPointByVersion_v12050 = 468,
482
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetDriverEntryPointByVersion_ptsz_v12050 = 469,
483
+ CUPTI_RUNTIME_TRACE_CBID_cuda470_v12060 = 470,
484
+ CUPTI_RUNTIME_TRACE_CBID_cuda471_v12060 = 471,
485
+ CUPTI_RUNTIME_TRACE_CBID_cuda472_v12060 = 472,
486
+ CUPTI_RUNTIME_TRACE_CBID_cuda473_v12060 = 473,
487
+ CUPTI_RUNTIME_TRACE_CBID_cuda474_v12060 = 474,
488
+ CUPTI_RUNTIME_TRACE_CBID_cuda475_v12060 = 475,
489
+ CUPTI_RUNTIME_TRACE_CBID_cuda476_v12060 = 476,
490
+ CUPTI_RUNTIME_TRACE_CBID_cuda477_v12060 = 477,
491
+ CUPTI_RUNTIME_TRACE_CBID_cuda478_v12060 = 478,
492
+ CUPTI_RUNTIME_TRACE_CBID_cuda479_v12060 = 479,
493
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetDevice_v12080 = 480,
494
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetDevice_ptsz_v12080 = 481,
495
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyBatchAsync_v12080 = 482,
496
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyBatchAsync_ptsz_v12080 = 483,
497
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DBatchAsync_v12080 = 484,
498
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DBatchAsync_ptsz_v12080 = 485,
499
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventElapsedTime_v2_v12080 = 486,
500
+ CUPTI_RUNTIME_TRACE_CBID_SIZE = 487,
501
+ CUPTI_RUNTIME_TRACE_CBID_FORCE_INT = 0x7fffffff
502
+ } CUpti_runtime_api_trace_cbid;
503
+
504
+ #endif
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_sass_metrics.h ADDED
@@ -0,0 +1,436 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2023 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_SASS_METRICS_H_)
51
+ #define _CUPTI_SASS_METRICS_H_
52
+
53
+ #include <cuda.h>
54
+ #include <cupti_result.h>
55
+ #include <cupti_profiler_target.h>
56
+
57
+ #ifdef __cplusplus
58
+ extern "C" {
59
+ #endif
60
+
61
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
62
+ #pragma GCC visibility push(default)
63
+ #endif
64
+
65
+ /**
66
+ * \defgroup CUPTI_SASS_METRICS_API CUPTI SASS Metrics API
67
+ * Functions, types, and enums that implement the CUPTI SASS Metrics API.
68
+ * @{
69
+ */
70
+
71
+ typedef enum
72
+ {
73
+ /// SASS metric data will be collected at GPU level.
74
+ /// In CUpti_SassMetricsGetDataProperties_Params struct the numOfInstances will be equal to 1
75
+ CUPTI_SASS_METRICS_OUTPUT_GRANULARITY_GPU = 0,
76
+
77
+ /// SASS metric data will be collected at SM level
78
+ /// In CUpti_SassMetricsGetDataProperties_Params struct the numOfInstances will be equal to number of SMs in the GPU
79
+ CUPTI_SASS_METRICS_OUTPUT_GRANULARITY_SM = 1,
80
+
81
+ /// SASS metric data will be collected at SM sub-partition level
82
+ /// In CUpti_SassMetricsGetDataProperties_Params struct the numOfInstances will be equal to number of SM sub-partitions in the GPU
83
+ CUPTI_SASS_METRICS_OUTPUT_GRANULARITY_SMSP = 2,
84
+
85
+ CUPTI_SASS_METRICS_OUTPUT_GRANULARITY_INVALID
86
+ } CUpti_SassMetrics_OutputGranularity;
87
+
88
+ typedef struct CUpti_SassMetrics_MetricDetails
89
+ {
90
+ /// unique ID for the SASS metric
91
+ uint64_t metricId;
92
+ /// metric name
93
+ const char* pMetricName;
94
+ /// metric description
95
+ const char* pMetricDescription;
96
+ } CUpti_SassMetrics_MetricDetails;
97
+
98
+ /**
99
+ * \brief Params for cuptiSassMetricsGetNumOfMetrics
100
+ */
101
+ typedef struct CUpti_SassMetrics_GetNumOfMetrics_Params
102
+ {
103
+ /// [in] should be equal to CUpti_SassMetrics_GetNumOfMetrics_Params_STRUCT_SIZE
104
+ size_t structSize;
105
+ /// [in] assign to NULL
106
+ void* pPriv;
107
+ /// [in] chip name for which metrics will be queried
108
+ const char* pChipName;
109
+ /// [out] number of metrics supported for the queried chip
110
+ size_t numOfMetrics;
111
+ } CUpti_SassMetrics_GetNumOfMetrics_Params;
112
+
113
+ #define CUpti_SassMetrics_GetNumOfMetrics_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_SassMetrics_GetNumOfMetrics_Params, numOfMetrics)
114
+
115
+ /**
116
+ * \brief Get the number of supported SASS metrics for the chip.
117
+ *
118
+ * \param pParams A pointer to \ref CUpti_SassMetrics_GetNumOfMetrics_Params
119
+ *
120
+ * \retval CUPTI_SUCCESS
121
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
122
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device doesn't support SASS metric collection
123
+ */
124
+ CUptiResult CUPTIAPI cuptiSassMetricsGetNumOfMetrics(CUpti_SassMetrics_GetNumOfMetrics_Params* pParams);
125
+
126
+ /**
127
+ * \brief Params for cuptiSassMetricsGetMetrics
128
+ */
129
+ typedef struct CUpti_SassMetrics_GetMetrics_Params
130
+ {
131
+ /// [in] should be equal to CUpti_SassMetrics_GetMetrics_Params_STRUCT_SIZE
132
+ size_t structSize;
133
+ /// [in] assign to NULL
134
+ void* pPriv;
135
+ /// [in] chip name for which metrics will be queried
136
+ const char* pChipName;
137
+ /// [in] number of metrics supported for the queried chip (can be queried using cuptiSassMetricsGetNumOfMetrics())
138
+ size_t numOfMetrics;
139
+ /// [out] list of metrics supported for queried chip
140
+ CUpti_SassMetrics_MetricDetails* pMetricsList;
141
+ } CUpti_SassMetrics_GetMetrics_Params;
142
+ #define CUpti_SassMetrics_GetMetrics_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_SassMetrics_GetMetrics_Params, pMetricsList)
143
+
144
+ /**
145
+ * \brief Get the list of all supported SASS metrics for the chip.
146
+ *
147
+ * \param pParams A pointer to \ref CUpti_SassMetrics_GetMetrics_Params
148
+ *
149
+ * \retval CUPTI_SUCCESS
150
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
151
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device doesn't support SASS metric collection
152
+ */
153
+ CUptiResult CUPTIAPI cuptiSassMetricsGetMetrics(CUpti_SassMetrics_GetMetrics_Params* pParams);
154
+
155
+ /**
156
+ * \brief Params for cuptiSassMetricsGetProperties
157
+ */
158
+ typedef struct CUpti_SassMetrics_GetProperties_Params
159
+ {
160
+ /// [in] should be equal to CUpti_SassMetrics_GetProperties_Params_STRUCT_SIZE
161
+ size_t structSize;
162
+ /// [in] assign to NULL
163
+ void* pPriv;
164
+ /// [in] chip name for which metric will be queried
165
+ const char* pChipName;
166
+ /// [in] metric name
167
+ const char* pMetricName;
168
+ /// [out] returns the metric ID and the metric description
169
+ CUpti_SassMetrics_MetricDetails metric;
170
+ } CUpti_SassMetrics_GetProperties_Params;
171
+ #define CUpti_SassMetrics_GetProperties_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_SassMetrics_GetProperties_Params, metric)
172
+
173
+ /**
174
+ * \brief Get metric properties for the queried metric.
175
+ * For a given metric the results will be put in CUpti_SassMetrics_MetricDetails which
176
+ * stores metric ID, description of the metric.
177
+ *
178
+ * \param pParams A pointer to \ref CUpti_SassMetrics_GetProperties_Params
179
+ *
180
+ * \retval CUPTI_SUCCESS
181
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
182
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device doesn't support SASS metric data collection
183
+ */
184
+ CUptiResult CUPTIAPI cuptiSassMetricsGetProperties(CUpti_SassMetrics_GetProperties_Params *pParams);
185
+
186
+ typedef struct CUpti_SassMetrics_Config
187
+ {
188
+ /// [in] unique id for the SASS metric, can be queried using cuptiSassMetricsGetProperties()
189
+ uint64_t metricId;
190
+ /// [in] CUpti_SassMetrics_OutputGranularity
191
+ uint8_t outputGranularity;
192
+ } CUpti_SassMetrics_Config;
193
+
194
+ /**
195
+ * \brief Params for cuptiSassMetricsSetConfig
196
+ */
197
+ typedef struct CUpti_SassMetricsSetConfig_Params
198
+ {
199
+ /// [in] equal to CUpti_SassMetricsSetConfig_Params_STRUCT_SIZE
200
+ size_t structSize;
201
+ /// [in] assign to NULL
202
+ void* pPriv;
203
+ /// [in] num of metric configs, will be equal to number of metrics queried
204
+ size_t numOfMetricConfig;
205
+ /// [in] list of metric config generated for given sass metrics
206
+ CUpti_SassMetrics_Config* pConfigs;
207
+ /// [in] device index for which config will be set, user can call this once for
208
+ /// the device on which the the SASS metric data will be collected
209
+ uint32_t deviceIndex;
210
+ } CUpti_SassMetricsSetConfig_Params;
211
+ #define CUpti_SassMetricsSetConfig_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_SassMetricsSetConfig_Params, deviceIndex)
212
+
213
+ /**
214
+ * \brief Set config for the SASS metric data collection for a device.
215
+ * User need to call this API before calling any of the SASS metric data collection APIs.
216
+ * Each set config API call need to be followed by cuptiSassPatchingUnSetConfig API
217
+ * before calling the cuptiSassMetricsSetConfig() API again for the same device.
218
+ *
219
+ * \param pParams A pointer to \ref CUpti_SassMetricsSetConfig_Params
220
+ *
221
+ * \retval CUPTI_SUCCESS
222
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
223
+ * \retval CUPTI_ERROR_INVALID_CONTEXT if any cuda context has not been created prior to this API call
224
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this is called multiple times for the device without calling unset config API
225
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device doesn't support SASS metric data collection
226
+ */
227
+ CUptiResult CUPTIAPI cuptiSassMetricsSetConfig(CUpti_SassMetricsSetConfig_Params *pParams);
228
+
229
+ /**
230
+ * \brief Params for cuptiSassMetricsUnsetConfig
231
+ */
232
+ typedef struct CUpti_SassMetricsUnsetConfig_Params
233
+ {
234
+ /// [in] equal to CUpti_SassMetricsUnsetConfig_Params_STRUCT_SIZE
235
+ size_t structSize;
236
+ /// [in] assign to NULL
237
+ void* pPriv;
238
+ /// [in] device index for which SASS metric data collection config will get reset, user need to call this API for
239
+ /// all the devices on which the the SASS metric data collection have been configured.
240
+ uint32_t deviceIndex;
241
+ } CUpti_SassMetricsUnsetConfig_Params;
242
+ #define CUpti_SassMetricsUnsetConfig_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_SassMetricsUnsetConfig_Params, deviceIndex)
243
+
244
+ /**
245
+ * \brief Unset config API will reset the SASS metric data collection configuration for the device.
246
+ * Once this API called CUPTI will deallocate all the memory allocated and remove all
247
+ * the configuration for SASS metric data collection. User can only call this API for a device where
248
+ * cuptiSassMetricsSetConfig() API has been called earlier for the device.
249
+ *
250
+ * \param pParams A pointer to \ref CUpti_SassMetricsSetConfig_Params
251
+ *
252
+ * \retval CUPTI_SUCCESS
253
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
254
+ * \retval CUPTI_ERROR_INVALID_CONTEXT if any cuda context has not been created prior to this API call
255
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this is called multiple times for the device without calling set config API
256
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device doesn't support SASS metric data collection
257
+ */
258
+ CUptiResult CUPTIAPI cuptiSassMetricsUnsetConfig(CUpti_SassMetricsUnsetConfig_Params *pParams);
259
+
260
+ /**
261
+ * \brief Params for cuptiSassMetricsEnable
262
+ */
263
+ typedef struct CUpti_SassMetricsEnable_Params
264
+ {
265
+ /// [in] equal to CUpti_SassMetricsEnable_Params_STRUCT_SIZE
266
+ size_t structSize;
267
+ /// [in] assign to NULL
268
+ void* pPriv;
269
+ /// [in] CUDA context on which SASS metric data collection will be enabled.
270
+ /// If set NULL, default context will be consider for SASS metric data collection.
271
+ CUcontext ctx;
272
+ /// [in] if false, all the functions will patched regardless of their execution with cuptiSassMetricsEnable() API call.
273
+ /// when this parameter is set to true, metric data collection for the function will be done at the very first execution in the enable/disble
274
+ /// range.
275
+ uint8_t enableLazyPatching;
276
+ } CUpti_SassMetricsEnable_Params;
277
+ #define CUpti_SassMetricsEnable_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_SassMetricsEnable_Params, enableLazyPatching)
278
+
279
+ /**
280
+ * \brief Sass metric data collection enable API will mark the start of a range, between which kernel
281
+ * will be profiled for SASS metrics.
282
+ *
283
+ * \param pParams A pointer to \ref CUpti_SassMetricsEnable_Params
284
+ *
285
+ * \retval CUPTI_SUCCESS
286
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
287
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device doesn't support SASS metric data collection
288
+ * \retval CUPTI_ERROR_INVALID_CONTEXT if any cuda context has not been created prior to this API call
289
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called multiple times for a cuda context without calling
290
+ * cuptiSassMetricsDisable() API or called before cuptiSassMetricsSetConfig() API call.
291
+ */
292
+ CUptiResult CUPTIAPI cuptiSassMetricsEnable(CUpti_SassMetricsEnable_Params* pParams);
293
+
294
+ /**
295
+ * \brief Params for cuptiSassMetricsDisable
296
+ */
297
+ typedef struct CUpti_SassMetricsDisable_Params
298
+ {
299
+ /// [in] equal to CUpti_SassMetricsDisable_Params_STRUCT_SIZE
300
+ size_t structSize;
301
+ /// [in] assign to NULL
302
+ void* pPriv;
303
+ /// [in] CUDA context on which SASS metric data collection will be disabled.
304
+ /// If set NULL, default context will be consider for SASS metric data collection.
305
+ CUcontext ctx;
306
+ /// [out] Num of dropped SASS records will be equal to numOfPatchedInstructions * numOfInstances.
307
+ /// Number of dropped records will be zero when data is flushed prior to calling the disable API.
308
+ size_t numOfDroppedRecords;
309
+ } CUpti_SassMetricsDisable_Params;
310
+ #define CUpti_SassMetricsDisable_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_SassMetricsDisable_Params, numOfDroppedRecords)
311
+
312
+ /**
313
+ * \brief SASS metric data collection disable API will mark the end of a range, any kernel launched after this
314
+ * API call will not be profiled for the SASS metrics.
315
+ *
316
+ * \param pParams A pointer to \ref CUpti_SassMetricsDisable_Params
317
+ *
318
+ * \retval CUPTI_SUCCESS
319
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
320
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device doesn't support SASS metric data collection
321
+ * \retval CUPTI_ERROR_INVALID_CONTEXT if any cuda context has not been created prior to this API call
322
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called multiple times for a cuda context without calling
323
+ * cuptiSassMetricsEnable() API or called before cuptiSassMetricsSetConfig() API call.
324
+ */
325
+ CUptiResult CUPTIAPI cuptiSassMetricsDisable(CUpti_SassMetricsDisable_Params* pParams);
326
+
327
+ /**
328
+ * \brief Params for cuptiSassMetricsGetDataProperties
329
+ */
330
+ typedef struct CUpti_SassMetricsGetDataProperties_Params
331
+ {
332
+ /// [in] equal to CUpti_SassMetricsGetDataProperties_Params_STRUCT_SIZE
333
+ size_t structSize;
334
+ /// [in] assign to NULL
335
+ void* pPriv;
336
+ /// [in] CUDA context on which SASS metric data collection was enabled.
337
+ /// If set NULL, default context will be consider for SASS metric data collection.
338
+ CUcontext ctx;
339
+ /// [out] total number of SASS records has been collected
340
+ size_t numOfPatchedInstructionRecords;
341
+ /// [out] number of instances for each metric value per instruction.
342
+ /// This will depend on CUpti_SassPatching_OutputGranularity level set for the metric config.
343
+ size_t numOfInstances;
344
+ } CUpti_SassMetricsGetDataProperties_Params;
345
+
346
+ #define CUpti_SassMetricsGetDataProperties_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_SassMetricsGetDataProperties_Params, numOfInstances)
347
+ /**
348
+ * \brief SASS metric data properties API will give the data regarding number of instances of a metric
349
+ * value and number of SASS instruction data has been collected. The number of instances of a metric
350
+ * will vary as per user set the output granularity level with CUpti_SassMetrics_OutputGranularity value.
351
+ * User need to allocate memory for retriving the SASS data using cuptiSassMetricsFlushData() API.
352
+ *
353
+ * \param pParams A pointer to \ref CUpti_SassMetricsGetDataProperties_Params
354
+ *
355
+ * \retval CUPTI_SUCCESS
356
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
357
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device doesn't support SASS metric data collection
358
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called outside the enable/disable range.
359
+ */
360
+ CUptiResult CUPTIAPI cuptiSassMetricsGetDataProperties(CUpti_SassMetricsGetDataProperties_Params* pParams);
361
+
362
+ typedef struct CUpti_SassMetrics_InstanceValue
363
+ {
364
+ // unique id of the metric
365
+ uint64_t metricId;
366
+ // metric value
367
+ uint64_t value;
368
+ } CUpti_SassMetrics_InstanceValue;
369
+ #define CUpti_SassMetrics_InstanceValue_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_SassMetrics_InstanceValue, value)
370
+
371
+ typedef struct CUpti_SassMetrics_Data
372
+ {
373
+ /// [in] equal to CUpti_SassMetricsFlushData_Params_STRUCT_SIZE
374
+ size_t structSize;
375
+ /// [in] assign to NULL
376
+ void* pPriv;
377
+ /// [out] Unique cubin id
378
+ uint32_t cubinCrc;
379
+ /// [out] function's unique symbol index in the module.
380
+ uint32_t functionIndex;
381
+ /// [out] The function name
382
+ const char* functionName;
383
+ /// [out] pc offset for the function in a module
384
+ uint32_t pcOffset;
385
+ /// [out] array of size equal to number of instances per metric, which contains the metric ID and metric value.
386
+ CUpti_SassMetrics_InstanceValue* pInstanceValues;
387
+ } CUpti_SassMetrics_Data;
388
+
389
+ /**
390
+ * \brief Params for cuptiSassMetricsFlushData
391
+ */
392
+ typedef struct CUpti_SassMetricsFlushData_Params
393
+ {
394
+ /// [in] equal to CUpti_SassMetricsFlushData_Params_STRUCT_SIZE
395
+ size_t structSize;
396
+ /// [in] assign to NULL
397
+ void* pPriv;
398
+ /// [in] CUDA context on which SASS metric data collection was enabled.
399
+ /// If set NULL, default context will be consider for SASS metric data collection.
400
+ CUcontext ctx;
401
+ /// [in] number of patched instruction record will be retrived, user can call cuptiSassMetricsGetDataProperties()
402
+ /// for getting total number of records available.
403
+ size_t numOfPatchedInstructionRecords;
404
+ /// [in] number of patched instruction record instances for a metric, user can call cuptiSassMetricsGetDataProperties()
405
+ /// for getting total number of instances for each record per metric available.
406
+ size_t numOfInstances;
407
+ /// [out]
408
+ CUpti_SassMetrics_Data* pMetricsData;
409
+ } CUpti_SassMetricsFlushData_Params;
410
+ #define CUpti_SassMetricsFlushData_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_SassMetricsFlushData_Params, numOfInstances)
411
+
412
+ /**
413
+ * \brief Flush SASS metrics data from CUPTI internal buffer to the user buffer.
414
+ * User needs to allocate the buffer for retrieving the data. The number of records collected
415
+ * can be queried using the API cuptiSassMetricsGetDataProperties().
416
+ *
417
+ * \param pParams A pointer to \ref CUpti_SassMetricsFlushData_Params
418
+ *
419
+ * \retval CUPTI_SUCCESS
420
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
421
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device doesn't support SASS metric data collection.
422
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called outside the enable/disable range.
423
+ */
424
+ CUptiResult CUPTIAPI cuptiSassMetricsFlushData(CUpti_SassMetricsFlushData_Params* pParams);
425
+
426
+ /** @} */ /* END CUPTI_SASS_METRICS_API */
427
+
428
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
429
+ #pragma GCC visibility pop
430
+ #endif
431
+
432
+ #ifdef __cplusplus
433
+ } /* extern "C" */
434
+ #endif
435
+
436
+ #endif // _CUPTI_SASS_METRICS_H_
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_target.h ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #if !defined(_CUPTI_TARGET_H_)
2
+ #define _CUPTI_TARGET_H_
3
+
4
+ /*
5
+ CUPTI profiler target API's
6
+ This file contains the CUPTI profiling API's.
7
+ */
8
+ #include <cupti_result.h>
9
+ #include <stddef.h>
10
+ #include <stdint.h>
11
+
12
+ #ifdef __cplusplus
13
+ extern "C" {
14
+ #endif
15
+
16
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
17
+ #pragma GCC visibility push(default)
18
+ #endif
19
+
20
+ #ifndef CUPTI_PROFILER_STRUCT_SIZE
21
+ #define CUPTI_PROFILER_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
22
+ #endif
23
+
24
+ typedef struct CUpti_Device_GetChipName_Params
25
+ {
26
+ size_t structSize; //!< [in]
27
+ void* pPriv; //!< [in] assign to NULL
28
+
29
+ size_t deviceIndex; //!< [in]
30
+ const char* pChipName; //!< [out]
31
+ } CUpti_Device_GetChipName_Params;
32
+
33
+ #define CUpti_Device_GetChipName_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Device_GetChipName_Params, pChipName)
34
+ CUptiResult CUPTIAPI cuptiDeviceGetChipName(CUpti_Device_GetChipName_Params *pParams);
35
+
36
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
37
+ #pragma GCC visibility pop
38
+ #endif
39
+
40
+ #ifdef __cplusplus
41
+ } /* extern "C" */
42
+ #endif
43
+ #endif
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/cupti_version.h ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2010-2024 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_VERSION_H_)
51
+ #define _CUPTI_VERSION_H_
52
+
53
+ #include <cuda_stdint.h>
54
+ #include <cupti_result.h>
55
+
56
+ #ifndef CUPTIAPI
57
+ #ifdef _WIN32
58
+ #define CUPTIAPI __stdcall
59
+ #else
60
+ #define CUPTIAPI
61
+ #endif
62
+ #endif
63
+
64
+ #if defined(__cplusplus)
65
+ extern "C" {
66
+ #endif
67
+
68
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
69
+ #pragma GCC visibility push(default)
70
+ #endif
71
+
72
+ /**
73
+ * \defgroup CUPTI_VERSION_API CUPTI Version
74
+ * Function and macro to determine the CUPTI version.
75
+ * @{
76
+ */
77
+
78
+ /**
79
+ * \brief The API version for this implementation of CUPTI.
80
+ *
81
+ * The API version for this implementation of CUPTI. This define along
82
+ * with \ref cuptiGetVersion can be used to dynamically detect if the
83
+ * version of CUPTI compiled against matches the version of the loaded
84
+ * CUPTI library.
85
+ *
86
+ * v1 : CUDAToolsSDK 4.0
87
+ * v2 : CUDAToolsSDK 4.1
88
+ * v3 : CUDA Toolkit 5.0
89
+ * v4 : CUDA Toolkit 5.5
90
+ * v5 : CUDA Toolkit 6.0
91
+ * v6 : CUDA Toolkit 6.5
92
+ * v7 : CUDA Toolkit 6.5(with sm_52 support)
93
+ * v8 : CUDA Toolkit 7.0
94
+ * v9 : CUDA Toolkit 8.0
95
+ * v10 : CUDA Toolkit 9.0
96
+ * v11 : CUDA Toolkit 9.1
97
+ * v12 : CUDA Toolkit 10.0, 10.1 and 10.2
98
+ * v13 : CUDA Toolkit 11.0
99
+ * v14 : CUDA Toolkit 11.1
100
+ * v15 : CUDA Toolkit 11.2, 11.3 and 11.4
101
+ * v16 : CUDA Toolkit 11.5
102
+ * v17 : CUDA Toolkit 11.6
103
+ * v18 : CUDA Toolkit 11.8
104
+ * v19 : CUDA Toolkit 12.0
105
+ * v20 : CUDA Toolkit 12.2
106
+ * v21 : CUDA Toolkit 12.3
107
+ * v22 : CUDA Toolkit 12.4
108
+ * v23 : CUDA Toolkit 12.5
109
+ * v24 : CUDA Toolkit 12.6
110
+ * v26 : CUDA Toolkit 12.8
111
+ */
112
+ #define CUPTI_API_VERSION 26
113
+
114
+ /**
115
+ * \brief Get the CUPTI API version.
116
+ *
117
+ * Return the API version in \p *version.
118
+ *
119
+ * \param version Returns the version
120
+ *
121
+ * \retval CUPTI_SUCCESS on success
122
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p version is NULL
123
+ * \sa CUPTI_API_VERSION
124
+ */
125
+ CUptiResult CUPTIAPI cuptiGetVersion(uint32_t *version);
126
+
127
+ /** @} */ /* END CUPTI_VERSION_API */
128
+
129
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
130
+ #pragma GCC visibility pop
131
+ #endif
132
+
133
+ #if defined(__cplusplus)
134
+ }
135
+ #endif
136
+
137
+ #endif /*_CUPTI_VERSION_H_*/
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_cudaGL_meta.h ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // This file is generated. Any changes you make will be lost during the next clean build.
2
+
3
+ // Dependent includes
4
+ #ifdef __APPLE__
5
+ #include <OpenGL/gl.h>
6
+ #else
7
+ #include <GL/gl.h>
8
+ #endif
9
+
10
+ // CUDA public interface, for type definitions and cu* function prototypes
11
+ #include "cudaGL.h"
12
+
13
+
14
+ // *************************************************************************
15
+ // Definitions of structs to hold parameters for each function
16
+ // *************************************************************************
17
+
18
+ typedef struct cuGraphicsGLRegisterBuffer_params_st {
19
+ CUgraphicsResource *pCudaResource;
20
+ GLuint buffer;
21
+ unsigned int Flags;
22
+ } cuGraphicsGLRegisterBuffer_params;
23
+
24
+ typedef struct cuGraphicsGLRegisterImage_params_st {
25
+ CUgraphicsResource *pCudaResource;
26
+ GLuint image;
27
+ GLenum target;
28
+ unsigned int Flags;
29
+ } cuGraphicsGLRegisterImage_params;
30
+
31
+ typedef struct cuGLGetDevices_v2_params_st {
32
+ unsigned int *pCudaDeviceCount;
33
+ CUdevice *pCudaDevices;
34
+ unsigned int cudaDeviceCount;
35
+ CUGLDeviceList deviceList;
36
+ } cuGLGetDevices_v2_params;
37
+
38
+ typedef struct cuGLCtxCreate_v2_params_st {
39
+ CUcontext *pCtx;
40
+ unsigned int Flags;
41
+ CUdevice device;
42
+ } cuGLCtxCreate_v2_params;
43
+
44
+ typedef struct cuGLRegisterBufferObject_params_st {
45
+ GLuint buffer;
46
+ } cuGLRegisterBufferObject_params;
47
+
48
+ typedef struct cuGLMapBufferObject_v2_ptds_params_st {
49
+ CUdeviceptr *dptr;
50
+ size_t *size;
51
+ GLuint buffer;
52
+ } cuGLMapBufferObject_v2_ptds_params;
53
+
54
+ typedef struct cuGLUnmapBufferObject_params_st {
55
+ GLuint buffer;
56
+ } cuGLUnmapBufferObject_params;
57
+
58
+ typedef struct cuGLUnregisterBufferObject_params_st {
59
+ GLuint buffer;
60
+ } cuGLUnregisterBufferObject_params;
61
+
62
+ typedef struct cuGLSetBufferObjectMapFlags_params_st {
63
+ GLuint buffer;
64
+ unsigned int Flags;
65
+ } cuGLSetBufferObjectMapFlags_params;
66
+
67
+ typedef struct cuGLMapBufferObjectAsync_v2_ptsz_params_st {
68
+ CUdeviceptr *dptr;
69
+ size_t *size;
70
+ GLuint buffer;
71
+ CUstream hStream;
72
+ } cuGLMapBufferObjectAsync_v2_ptsz_params;
73
+
74
+ typedef struct cuGLUnmapBufferObjectAsync_params_st {
75
+ GLuint buffer;
76
+ CUstream hStream;
77
+ } cuGLUnmapBufferObjectAsync_params;
78
+
79
+ typedef struct cuGLGetDevices_params_st {
80
+ unsigned int *pCudaDeviceCount;
81
+ CUdevice *pCudaDevices;
82
+ unsigned int cudaDeviceCount;
83
+ CUGLDeviceList deviceList;
84
+ } cuGLGetDevices_params;
85
+
86
+ typedef struct cuGLMapBufferObject_v2_params_st {
87
+ CUdeviceptr *dptr;
88
+ size_t *size;
89
+ GLuint buffer;
90
+ } cuGLMapBufferObject_v2_params;
91
+
92
+ typedef struct cuGLMapBufferObjectAsync_v2_params_st {
93
+ CUdeviceptr *dptr;
94
+ size_t *size;
95
+ GLuint buffer;
96
+ CUstream hStream;
97
+ } cuGLMapBufferObjectAsync_v2_params;
98
+
99
+ typedef struct cuGLCtxCreate_params_st {
100
+ CUcontext *pCtx;
101
+ unsigned int Flags;
102
+ CUdevice device;
103
+ } cuGLCtxCreate_params;
104
+
105
+ typedef struct cuGLMapBufferObject_params_st {
106
+ CUdeviceptr_v1 *dptr;
107
+ unsigned int *size;
108
+ GLuint buffer;
109
+ } cuGLMapBufferObject_params;
110
+
111
+ typedef struct cuGLMapBufferObjectAsync_params_st {
112
+ CUdeviceptr_v1 *dptr;
113
+ unsigned int *size;
114
+ GLuint buffer;
115
+ CUstream hStream;
116
+ } cuGLMapBufferObjectAsync_params;
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_cudaVDPAU_meta.h ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // This file is generated. Any changes you make will be lost during the next clean build.
2
+
3
+ // Dependent includes
4
+ #include <vdpau/vdpau.h>
5
+
6
+ // CUDA public interface, for type definitions and cu* function prototypes
7
+ #include "cudaVDPAU.h"
8
+
9
+
10
+ // *************************************************************************
11
+ // Definitions of structs to hold parameters for each function
12
+ // *************************************************************************
13
+
14
+ typedef struct cuVDPAUGetDevice_params_st {
15
+ CUdevice *pDevice;
16
+ VdpDevice vdpDevice;
17
+ VdpGetProcAddress *vdpGetProcAddress;
18
+ } cuVDPAUGetDevice_params;
19
+
20
+ typedef struct cuVDPAUCtxCreate_v2_params_st {
21
+ CUcontext *pCtx;
22
+ unsigned int flags;
23
+ CUdevice device;
24
+ VdpDevice vdpDevice;
25
+ VdpGetProcAddress *vdpGetProcAddress;
26
+ } cuVDPAUCtxCreate_v2_params;
27
+
28
+ typedef struct cuGraphicsVDPAURegisterVideoSurface_params_st {
29
+ CUgraphicsResource *pCudaResource;
30
+ VdpVideoSurface vdpSurface;
31
+ unsigned int flags;
32
+ } cuGraphicsVDPAURegisterVideoSurface_params;
33
+
34
+ typedef struct cuGraphicsVDPAURegisterOutputSurface_params_st {
35
+ CUgraphicsResource *pCudaResource;
36
+ VdpOutputSurface vdpSurface;
37
+ unsigned int flags;
38
+ } cuGraphicsVDPAURegisterOutputSurface_params;
39
+
40
+ typedef struct cuVDPAUCtxCreate_params_st {
41
+ CUcontext *pCtx;
42
+ unsigned int flags;
43
+ CUdevice device;
44
+ VdpDevice vdpDevice;
45
+ VdpGetProcAddress *vdpGetProcAddress;
46
+ } cuVDPAUCtxCreate_params;
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_cuda_gl_interop_meta.h ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // This file is generated. Any changes you make will be lost during the next clean build.
2
+
3
+ // CUDA public interface, for type definitions and api function prototypes
4
+ #include "cuda_gl_interop.h"
5
+
6
+ // *************************************************************************
7
+ // Definitions of structs to hold parameters for each function
8
+ // *************************************************************************
9
+
10
+ // Currently used parameter trace structures
11
+ typedef struct cudaGLGetDevices_v4010_params_st {
12
+ unsigned int *pCudaDeviceCount;
13
+ int *pCudaDevices;
14
+ unsigned int cudaDeviceCount;
15
+ enum cudaGLDeviceList deviceList;
16
+ } cudaGLGetDevices_v4010_params;
17
+
18
+ typedef struct cudaGraphicsGLRegisterImage_v3020_params_st {
19
+ struct cudaGraphicsResource **resource;
20
+ GLuint image;
21
+ GLenum target;
22
+ unsigned int flags;
23
+ } cudaGraphicsGLRegisterImage_v3020_params;
24
+
25
+ typedef struct cudaGraphicsGLRegisterBuffer_v3020_params_st {
26
+ struct cudaGraphicsResource **resource;
27
+ GLuint buffer;
28
+ unsigned int flags;
29
+ } cudaGraphicsGLRegisterBuffer_v3020_params;
30
+
31
+ typedef struct cudaGLSetGLDevice_v3020_params_st {
32
+ int device;
33
+ } cudaGLSetGLDevice_v3020_params;
34
+
35
+ typedef struct cudaGLRegisterBufferObject_v3020_params_st {
36
+ GLuint bufObj;
37
+ } cudaGLRegisterBufferObject_v3020_params;
38
+
39
+ typedef struct cudaGLMapBufferObject_v3020_params_st {
40
+ void **devPtr;
41
+ GLuint bufObj;
42
+ } cudaGLMapBufferObject_v3020_params;
43
+
44
+ typedef struct cudaGLUnmapBufferObject_v3020_params_st {
45
+ GLuint bufObj;
46
+ } cudaGLUnmapBufferObject_v3020_params;
47
+
48
+ typedef struct cudaGLUnregisterBufferObject_v3020_params_st {
49
+ GLuint bufObj;
50
+ } cudaGLUnregisterBufferObject_v3020_params;
51
+
52
+ typedef struct cudaGLSetBufferObjectMapFlags_v3020_params_st {
53
+ GLuint bufObj;
54
+ unsigned int flags;
55
+ } cudaGLSetBufferObjectMapFlags_v3020_params;
56
+
57
+ typedef struct cudaGLMapBufferObjectAsync_v3020_params_st {
58
+ void **devPtr;
59
+ GLuint bufObj;
60
+ cudaStream_t stream;
61
+ } cudaGLMapBufferObjectAsync_v3020_params;
62
+
63
+ typedef struct cudaGLUnmapBufferObjectAsync_v3020_params_st {
64
+ GLuint bufObj;
65
+ cudaStream_t stream;
66
+ } cudaGLUnmapBufferObjectAsync_v3020_params;
67
+
68
+ // Parameter trace structures for removed functions
69
+
70
+
71
+ // End of parameter trace structures
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_cuda_meta.h ADDED
The diff for this file is too large to render. See raw diff
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_cuda_runtime_api_meta.h ADDED
@@ -0,0 +1,2372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // This file is generated. Any changes you make will be lost during the next clean build.
2
+
3
+ // CUDA public interface, for type definitions and api function prototypes
4
+ #include "cuda_runtime_api.h"
5
+
6
+ // *************************************************************************
7
+ // Definitions of structs to hold parameters for each function
8
+ // *************************************************************************
9
+
10
+ // Currently used parameter trace structures
11
+ typedef struct cudaDeviceSetLimit_v3020_params_st {
12
+ enum cudaLimit limit;
13
+ size_t value;
14
+ } cudaDeviceSetLimit_v3020_params;
15
+
16
+ typedef struct cudaDeviceGetLimit_v3020_params_st {
17
+ size_t *pValue;
18
+ enum cudaLimit limit;
19
+ } cudaDeviceGetLimit_v3020_params;
20
+
21
+ typedef struct cudaDeviceGetTexture1DLinearMaxWidth_v11010_params_st {
22
+ size_t *maxWidthInElements;
23
+ const struct cudaChannelFormatDesc *fmtDesc;
24
+ int device;
25
+ } cudaDeviceGetTexture1DLinearMaxWidth_v11010_params;
26
+
27
+ typedef struct cudaDeviceGetCacheConfig_v3020_params_st {
28
+ enum cudaFuncCache *pCacheConfig;
29
+ } cudaDeviceGetCacheConfig_v3020_params;
30
+
31
+ typedef struct cudaDeviceGetStreamPriorityRange_v5050_params_st {
32
+ int *leastPriority;
33
+ int *greatestPriority;
34
+ } cudaDeviceGetStreamPriorityRange_v5050_params;
35
+
36
+ typedef struct cudaDeviceSetCacheConfig_v3020_params_st {
37
+ enum cudaFuncCache cacheConfig;
38
+ } cudaDeviceSetCacheConfig_v3020_params;
39
+
40
+ typedef struct cudaDeviceGetByPCIBusId_v4010_params_st {
41
+ int *device;
42
+ const char *pciBusId;
43
+ } cudaDeviceGetByPCIBusId_v4010_params;
44
+
45
+ typedef struct cudaDeviceGetPCIBusId_v4010_params_st {
46
+ char *pciBusId;
47
+ int len;
48
+ int device;
49
+ } cudaDeviceGetPCIBusId_v4010_params;
50
+
51
+ typedef struct cudaIpcGetEventHandle_v4010_params_st {
52
+ cudaIpcEventHandle_t *handle;
53
+ cudaEvent_t event;
54
+ } cudaIpcGetEventHandle_v4010_params;
55
+
56
+ typedef struct cudaIpcOpenEventHandle_v4010_params_st {
57
+ cudaEvent_t *event;
58
+ cudaIpcEventHandle_t handle;
59
+ } cudaIpcOpenEventHandle_v4010_params;
60
+
61
+ typedef struct cudaIpcGetMemHandle_v4010_params_st {
62
+ cudaIpcMemHandle_t *handle;
63
+ void *devPtr;
64
+ } cudaIpcGetMemHandle_v4010_params;
65
+
66
+ typedef struct cudaIpcOpenMemHandle_v4010_params_st {
67
+ void **devPtr;
68
+ cudaIpcMemHandle_t handle;
69
+ unsigned int flags;
70
+ } cudaIpcOpenMemHandle_v4010_params;
71
+
72
+ typedef struct cudaIpcCloseMemHandle_v4010_params_st {
73
+ void *devPtr;
74
+ } cudaIpcCloseMemHandle_v4010_params;
75
+
76
+ typedef struct cudaDeviceFlushGPUDirectRDMAWrites_v11030_params_st {
77
+ enum cudaFlushGPUDirectRDMAWritesTarget target;
78
+ enum cudaFlushGPUDirectRDMAWritesScope scope;
79
+ } cudaDeviceFlushGPUDirectRDMAWrites_v11030_params;
80
+
81
+ typedef struct cudaDeviceRegisterAsyncNotification_v12040_params_st {
82
+ int device;
83
+ cudaAsyncCallback callbackFunc;
84
+ void *userData;
85
+ cudaAsyncCallbackHandle_t *callback;
86
+ } cudaDeviceRegisterAsyncNotification_v12040_params;
87
+
88
+ typedef struct cudaDeviceUnregisterAsyncNotification_v12040_params_st {
89
+ int device;
90
+ cudaAsyncCallbackHandle_t callback;
91
+ } cudaDeviceUnregisterAsyncNotification_v12040_params;
92
+
93
+ typedef struct cudaDeviceGetSharedMemConfig_v4020_params_st {
94
+ enum cudaSharedMemConfig *pConfig;
95
+ } cudaDeviceGetSharedMemConfig_v4020_params;
96
+
97
+ typedef struct cudaDeviceSetSharedMemConfig_v4020_params_st {
98
+ enum cudaSharedMemConfig config;
99
+ } cudaDeviceSetSharedMemConfig_v4020_params;
100
+
101
+ typedef struct cudaGetErrorName_v6050_params_st {
102
+ cudaError_t error;
103
+ } cudaGetErrorName_v6050_params;
104
+
105
+ typedef struct cudaGetErrorString_v3020_params_st {
106
+ cudaError_t error;
107
+ } cudaGetErrorString_v3020_params;
108
+
109
+ typedef struct cudaGetDeviceCount_v3020_params_st {
110
+ int *count;
111
+ } cudaGetDeviceCount_v3020_params;
112
+
113
+ typedef struct cudaGetDeviceProperties_v2_v12000_params_st {
114
+ struct cudaDeviceProp *prop;
115
+ int device;
116
+ } cudaGetDeviceProperties_v2_v12000_params;
117
+
118
+ typedef struct cudaDeviceGetAttribute_v5000_params_st {
119
+ int *value;
120
+ enum cudaDeviceAttr attr;
121
+ int device;
122
+ } cudaDeviceGetAttribute_v5000_params;
123
+
124
+ typedef struct cudaDeviceGetDefaultMemPool_v11020_params_st {
125
+ cudaMemPool_t *memPool;
126
+ int device;
127
+ } cudaDeviceGetDefaultMemPool_v11020_params;
128
+
129
+ typedef struct cudaDeviceSetMemPool_v11020_params_st {
130
+ int device;
131
+ cudaMemPool_t memPool;
132
+ } cudaDeviceSetMemPool_v11020_params;
133
+
134
+ typedef struct cudaDeviceGetMemPool_v11020_params_st {
135
+ cudaMemPool_t *memPool;
136
+ int device;
137
+ } cudaDeviceGetMemPool_v11020_params;
138
+
139
+ typedef struct cudaDeviceGetNvSciSyncAttributes_v10020_params_st {
140
+ void *nvSciSyncAttrList;
141
+ int device;
142
+ int flags;
143
+ } cudaDeviceGetNvSciSyncAttributes_v10020_params;
144
+
145
+ typedef struct cudaDeviceGetP2PAttribute_v8000_params_st {
146
+ int *value;
147
+ enum cudaDeviceP2PAttr attr;
148
+ int srcDevice;
149
+ int dstDevice;
150
+ } cudaDeviceGetP2PAttribute_v8000_params;
151
+
152
+ typedef struct cudaChooseDevice_v3020_params_st {
153
+ int *device;
154
+ const struct cudaDeviceProp *prop;
155
+ } cudaChooseDevice_v3020_params;
156
+
157
+ typedef struct cudaInitDevice_v12000_params_st {
158
+ int device;
159
+ unsigned int deviceFlags;
160
+ unsigned int flags;
161
+ } cudaInitDevice_v12000_params;
162
+
163
+ typedef struct cudaSetDevice_v3020_params_st {
164
+ int device;
165
+ } cudaSetDevice_v3020_params;
166
+
167
+ typedef struct cudaGetDevice_v3020_params_st {
168
+ int *device;
169
+ } cudaGetDevice_v3020_params;
170
+
171
+ typedef struct cudaSetValidDevices_v3020_params_st {
172
+ int *device_arr;
173
+ int len;
174
+ } cudaSetValidDevices_v3020_params;
175
+
176
+ typedef struct cudaSetDeviceFlags_v3020_params_st {
177
+ unsigned int flags;
178
+ } cudaSetDeviceFlags_v3020_params;
179
+
180
+ typedef struct cudaGetDeviceFlags_v7000_params_st {
181
+ unsigned int *flags;
182
+ } cudaGetDeviceFlags_v7000_params;
183
+
184
+ typedef struct cudaStreamCreate_v3020_params_st {
185
+ cudaStream_t *pStream;
186
+ } cudaStreamCreate_v3020_params;
187
+
188
+ typedef struct cudaStreamCreateWithFlags_v5000_params_st {
189
+ cudaStream_t *pStream;
190
+ unsigned int flags;
191
+ } cudaStreamCreateWithFlags_v5000_params;
192
+
193
+ typedef struct cudaStreamCreateWithPriority_v5050_params_st {
194
+ cudaStream_t *pStream;
195
+ unsigned int flags;
196
+ int priority;
197
+ } cudaStreamCreateWithPriority_v5050_params;
198
+
199
+ typedef struct cudaStreamGetPriority_ptsz_v7000_params_st {
200
+ cudaStream_t hStream;
201
+ int *priority;
202
+ } cudaStreamGetPriority_ptsz_v7000_params;
203
+
204
+ typedef struct cudaStreamGetFlags_ptsz_v7000_params_st {
205
+ cudaStream_t hStream;
206
+ unsigned int *flags;
207
+ } cudaStreamGetFlags_ptsz_v7000_params;
208
+
209
+ typedef struct cudaStreamGetId_ptsz_v12000_params_st {
210
+ cudaStream_t hStream;
211
+ unsigned long long *streamId;
212
+ } cudaStreamGetId_ptsz_v12000_params;
213
+
214
+ typedef struct cudaStreamGetDevice_ptsz_v12080_params_st {
215
+ cudaStream_t hStream;
216
+ int *device;
217
+ } cudaStreamGetDevice_ptsz_v12080_params;
218
+
219
+ typedef struct cudaStreamCopyAttributes_ptsz_v11000_params_st {
220
+ cudaStream_t dst;
221
+ cudaStream_t src;
222
+ } cudaStreamCopyAttributes_ptsz_v11000_params;
223
+
224
+ typedef struct cudaStreamGetAttribute_ptsz_v11000_params_st {
225
+ cudaStream_t hStream;
226
+ cudaStreamAttrID attr;
227
+ cudaStreamAttrValue *value_out;
228
+ } cudaStreamGetAttribute_ptsz_v11000_params;
229
+
230
+ typedef struct cudaStreamSetAttribute_ptsz_v11000_params_st {
231
+ cudaStream_t hStream;
232
+ cudaStreamAttrID attr;
233
+ const cudaStreamAttrValue *value;
234
+ } cudaStreamSetAttribute_ptsz_v11000_params;
235
+
236
+ typedef struct cudaStreamDestroy_v5050_params_st {
237
+ cudaStream_t stream;
238
+ } cudaStreamDestroy_v5050_params;
239
+
240
+ typedef struct cudaStreamWaitEvent_ptsz_v7000_params_st {
241
+ cudaStream_t stream;
242
+ cudaEvent_t event;
243
+ unsigned int flags;
244
+ } cudaStreamWaitEvent_ptsz_v7000_params;
245
+
246
+ typedef struct cudaStreamAddCallback_ptsz_v7000_params_st {
247
+ cudaStream_t stream;
248
+ cudaStreamCallback_t callback;
249
+ void *userData;
250
+ unsigned int flags;
251
+ } cudaStreamAddCallback_ptsz_v7000_params;
252
+
253
+ typedef struct cudaStreamSynchronize_ptsz_v7000_params_st {
254
+ cudaStream_t stream;
255
+ } cudaStreamSynchronize_ptsz_v7000_params;
256
+
257
+ typedef struct cudaStreamQuery_ptsz_v7000_params_st {
258
+ cudaStream_t stream;
259
+ } cudaStreamQuery_ptsz_v7000_params;
260
+
261
+ typedef struct cudaStreamAttachMemAsync_ptsz_v7000_params_st {
262
+ cudaStream_t stream;
263
+ void *devPtr;
264
+ size_t length;
265
+ unsigned int flags;
266
+ } cudaStreamAttachMemAsync_ptsz_v7000_params;
267
+
268
+ typedef struct cudaStreamBeginCapture_ptsz_v10000_params_st {
269
+ cudaStream_t stream;
270
+ enum cudaStreamCaptureMode mode;
271
+ } cudaStreamBeginCapture_ptsz_v10000_params;
272
+
273
+ typedef struct cudaStreamBeginCaptureToGraph_ptsz_v12030_params_st {
274
+ cudaStream_t stream;
275
+ cudaGraph_t graph;
276
+ const cudaGraphNode_t *dependencies;
277
+ const cudaGraphEdgeData *dependencyData;
278
+ size_t numDependencies;
279
+ enum cudaStreamCaptureMode mode;
280
+ } cudaStreamBeginCaptureToGraph_ptsz_v12030_params;
281
+
282
+ typedef struct cudaThreadExchangeStreamCaptureMode_v10010_params_st {
283
+ enum cudaStreamCaptureMode *mode;
284
+ } cudaThreadExchangeStreamCaptureMode_v10010_params;
285
+
286
+ typedef struct cudaStreamEndCapture_ptsz_v10000_params_st {
287
+ cudaStream_t stream;
288
+ cudaGraph_t *pGraph;
289
+ } cudaStreamEndCapture_ptsz_v10000_params;
290
+
291
+ typedef struct cudaStreamIsCapturing_ptsz_v10000_params_st {
292
+ cudaStream_t stream;
293
+ enum cudaStreamCaptureStatus *pCaptureStatus;
294
+ } cudaStreamIsCapturing_ptsz_v10000_params;
295
+
296
+ typedef struct cudaStreamGetCaptureInfo_v2_ptsz_v11030_params_st {
297
+ cudaStream_t stream;
298
+ enum cudaStreamCaptureStatus *captureStatus_out;
299
+ unsigned long long *id_out;
300
+ cudaGraph_t *graph_out;
301
+ const cudaGraphNode_t **dependencies_out;
302
+ size_t *numDependencies_out;
303
+ } cudaStreamGetCaptureInfo_v2_ptsz_v11030_params;
304
+
305
+ typedef struct cudaStreamGetCaptureInfo_v3_ptsz_v12030_params_st {
306
+ cudaStream_t stream;
307
+ enum cudaStreamCaptureStatus *captureStatus_out;
308
+ unsigned long long *id_out;
309
+ cudaGraph_t *graph_out;
310
+ const cudaGraphNode_t **dependencies_out;
311
+ const cudaGraphEdgeData **edgeData_out;
312
+ size_t *numDependencies_out;
313
+ } cudaStreamGetCaptureInfo_v3_ptsz_v12030_params;
314
+
315
+ typedef struct cudaStreamUpdateCaptureDependencies_ptsz_v11030_params_st {
316
+ cudaStream_t stream;
317
+ cudaGraphNode_t *dependencies;
318
+ size_t numDependencies;
319
+ unsigned int flags;
320
+ } cudaStreamUpdateCaptureDependencies_ptsz_v11030_params;
321
+
322
+ typedef struct cudaStreamUpdateCaptureDependencies_v2_ptsz_v12030_params_st {
323
+ cudaStream_t stream;
324
+ cudaGraphNode_t *dependencies;
325
+ const cudaGraphEdgeData *dependencyData;
326
+ size_t numDependencies;
327
+ unsigned int flags;
328
+ } cudaStreamUpdateCaptureDependencies_v2_ptsz_v12030_params;
329
+
330
+ typedef struct cudaEventCreate_v3020_params_st {
331
+ cudaEvent_t *event;
332
+ } cudaEventCreate_v3020_params;
333
+
334
+ typedef struct cudaEventCreateWithFlags_v3020_params_st {
335
+ cudaEvent_t *event;
336
+ unsigned int flags;
337
+ } cudaEventCreateWithFlags_v3020_params;
338
+
339
+ typedef struct cudaEventRecord_ptsz_v7000_params_st {
340
+ cudaEvent_t event;
341
+ cudaStream_t stream;
342
+ } cudaEventRecord_ptsz_v7000_params;
343
+
344
+ typedef struct cudaEventRecordWithFlags_ptsz_v11010_params_st {
345
+ cudaEvent_t event;
346
+ cudaStream_t stream;
347
+ unsigned int flags;
348
+ } cudaEventRecordWithFlags_ptsz_v11010_params;
349
+
350
+ typedef struct cudaEventQuery_v3020_params_st {
351
+ cudaEvent_t event;
352
+ } cudaEventQuery_v3020_params;
353
+
354
+ typedef struct cudaEventSynchronize_v3020_params_st {
355
+ cudaEvent_t event;
356
+ } cudaEventSynchronize_v3020_params;
357
+
358
+ typedef struct cudaEventDestroy_v3020_params_st {
359
+ cudaEvent_t event;
360
+ } cudaEventDestroy_v3020_params;
361
+
362
+ typedef struct cudaEventElapsedTime_v3020_params_st {
363
+ float *ms;
364
+ cudaEvent_t start;
365
+ cudaEvent_t end;
366
+ } cudaEventElapsedTime_v3020_params;
367
+
368
+ typedef struct cudaEventElapsedTime_v2_v12080_params_st {
369
+ float *ms;
370
+ cudaEvent_t start;
371
+ cudaEvent_t end;
372
+ } cudaEventElapsedTime_v2_v12080_params;
373
+
374
+ typedef struct cudaImportExternalMemory_v10000_params_st {
375
+ cudaExternalMemory_t *extMem_out;
376
+ const struct cudaExternalMemoryHandleDesc *memHandleDesc;
377
+ } cudaImportExternalMemory_v10000_params;
378
+
379
+ typedef struct cudaExternalMemoryGetMappedBuffer_v10000_params_st {
380
+ void **devPtr;
381
+ cudaExternalMemory_t extMem;
382
+ const struct cudaExternalMemoryBufferDesc *bufferDesc;
383
+ } cudaExternalMemoryGetMappedBuffer_v10000_params;
384
+
385
+ typedef struct cudaExternalMemoryGetMappedMipmappedArray_v10000_params_st {
386
+ cudaMipmappedArray_t *mipmap;
387
+ cudaExternalMemory_t extMem;
388
+ const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc;
389
+ } cudaExternalMemoryGetMappedMipmappedArray_v10000_params;
390
+
391
+ typedef struct cudaDestroyExternalMemory_v10000_params_st {
392
+ cudaExternalMemory_t extMem;
393
+ } cudaDestroyExternalMemory_v10000_params;
394
+
395
+ typedef struct cudaImportExternalSemaphore_v10000_params_st {
396
+ cudaExternalSemaphore_t *extSem_out;
397
+ const struct cudaExternalSemaphoreHandleDesc *semHandleDesc;
398
+ } cudaImportExternalSemaphore_v10000_params;
399
+
400
+ typedef struct cudaSignalExternalSemaphoresAsync_v2_ptsz_v11020_params_st {
401
+ const cudaExternalSemaphore_t *extSemArray;
402
+ const struct cudaExternalSemaphoreSignalParams *paramsArray;
403
+ unsigned int numExtSems;
404
+ cudaStream_t stream;
405
+ } cudaSignalExternalSemaphoresAsync_v2_ptsz_v11020_params;
406
+
407
+ typedef struct cudaWaitExternalSemaphoresAsync_v2_ptsz_v11020_params_st {
408
+ const cudaExternalSemaphore_t *extSemArray;
409
+ const struct cudaExternalSemaphoreWaitParams *paramsArray;
410
+ unsigned int numExtSems;
411
+ cudaStream_t stream;
412
+ } cudaWaitExternalSemaphoresAsync_v2_ptsz_v11020_params;
413
+
414
+ typedef struct cudaDestroyExternalSemaphore_v10000_params_st {
415
+ cudaExternalSemaphore_t extSem;
416
+ } cudaDestroyExternalSemaphore_v10000_params;
417
+
418
+ typedef struct cudaLaunchKernel_ptsz_v7000_params_st {
419
+ const void *func;
420
+ dim3 gridDim;
421
+ dim3 blockDim;
422
+ void **args;
423
+ size_t sharedMem;
424
+ cudaStream_t stream;
425
+ } cudaLaunchKernel_ptsz_v7000_params;
426
+
427
+ typedef struct cudaLaunchKernelExC_ptsz_v11060_params_st {
428
+ const cudaLaunchConfig_t *config;
429
+ const void *func;
430
+ void **args;
431
+ } cudaLaunchKernelExC_ptsz_v11060_params;
432
+
433
+ typedef struct cudaLaunchCooperativeKernel_ptsz_v9000_params_st {
434
+ const void *func;
435
+ dim3 gridDim;
436
+ dim3 blockDim;
437
+ void **args;
438
+ size_t sharedMem;
439
+ cudaStream_t stream;
440
+ } cudaLaunchCooperativeKernel_ptsz_v9000_params;
441
+
442
+ typedef struct cudaLaunchCooperativeKernelMultiDevice_v9000_params_st {
443
+ struct cudaLaunchParams *launchParamsList;
444
+ unsigned int numDevices;
445
+ unsigned int flags;
446
+ } cudaLaunchCooperativeKernelMultiDevice_v9000_params;
447
+
448
+ typedef struct cudaFuncSetCacheConfig_v3020_params_st {
449
+ const void *func;
450
+ enum cudaFuncCache cacheConfig;
451
+ } cudaFuncSetCacheConfig_v3020_params;
452
+
453
+ typedef struct cudaFuncGetAttributes_v3020_params_st {
454
+ struct cudaFuncAttributes *attr;
455
+ const void *func;
456
+ } cudaFuncGetAttributes_v3020_params;
457
+
458
+ typedef struct cudaFuncSetAttribute_v9000_params_st {
459
+ const void *func;
460
+ enum cudaFuncAttribute attr;
461
+ int value;
462
+ } cudaFuncSetAttribute_v9000_params;
463
+
464
+ typedef struct cudaFuncGetName_v12030_params_st {
465
+ const char **name;
466
+ const void *func;
467
+ } cudaFuncGetName_v12030_params;
468
+
469
+ typedef struct cudaFuncGetParamInfo_v12040_params_st {
470
+ const void *func;
471
+ size_t paramIndex;
472
+ size_t *paramOffset;
473
+ size_t *paramSize;
474
+ } cudaFuncGetParamInfo_v12040_params;
475
+
476
+ typedef struct cudaLaunchHostFunc_ptsz_v10000_params_st {
477
+ cudaStream_t stream;
478
+ cudaHostFn_t fn;
479
+ void *userData;
480
+ } cudaLaunchHostFunc_ptsz_v10000_params;
481
+
482
+ typedef struct cudaFuncSetSharedMemConfig_v4020_params_st {
483
+ const void *func;
484
+ enum cudaSharedMemConfig config;
485
+ } cudaFuncSetSharedMemConfig_v4020_params;
486
+
487
+ typedef struct cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050_params_st {
488
+ int *numBlocks;
489
+ const void *func;
490
+ int blockSize;
491
+ size_t dynamicSMemSize;
492
+ } cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050_params;
493
+
494
+ typedef struct cudaOccupancyAvailableDynamicSMemPerBlock_v10200_params_st {
495
+ size_t *dynamicSmemSize;
496
+ const void *func;
497
+ int numBlocks;
498
+ int blockSize;
499
+ } cudaOccupancyAvailableDynamicSMemPerBlock_v10200_params;
500
+
501
+ typedef struct cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_v7000_params_st {
502
+ int *numBlocks;
503
+ const void *func;
504
+ int blockSize;
505
+ size_t dynamicSMemSize;
506
+ unsigned int flags;
507
+ } cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_v7000_params;
508
+
509
+ typedef struct cudaOccupancyMaxPotentialClusterSize_v11070_params_st {
510
+ int *clusterSize;
511
+ const void *func;
512
+ const cudaLaunchConfig_t *launchConfig;
513
+ } cudaOccupancyMaxPotentialClusterSize_v11070_params;
514
+
515
+ typedef struct cudaOccupancyMaxActiveClusters_v11070_params_st {
516
+ int *numClusters;
517
+ const void *func;
518
+ const cudaLaunchConfig_t *launchConfig;
519
+ } cudaOccupancyMaxActiveClusters_v11070_params;
520
+
521
+ typedef struct cudaMallocManaged_v6000_params_st {
522
+ void **devPtr;
523
+ size_t size;
524
+ unsigned int flags;
525
+ } cudaMallocManaged_v6000_params;
526
+
527
+ typedef struct cudaMalloc_v3020_params_st {
528
+ void **devPtr;
529
+ size_t size;
530
+ } cudaMalloc_v3020_params;
531
+
532
+ typedef struct cudaMallocHost_v3020_params_st {
533
+ void **ptr;
534
+ size_t size;
535
+ } cudaMallocHost_v3020_params;
536
+
537
+ typedef struct cudaMallocPitch_v3020_params_st {
538
+ void **devPtr;
539
+ size_t *pitch;
540
+ size_t width;
541
+ size_t height;
542
+ } cudaMallocPitch_v3020_params;
543
+
544
+ typedef struct cudaMallocArray_v3020_params_st {
545
+ cudaArray_t *array;
546
+ const struct cudaChannelFormatDesc *desc;
547
+ size_t width;
548
+ size_t height;
549
+ unsigned int flags;
550
+ } cudaMallocArray_v3020_params;
551
+
552
+ typedef struct cudaFree_v3020_params_st {
553
+ void *devPtr;
554
+ } cudaFree_v3020_params;
555
+
556
+ typedef struct cudaFreeHost_v3020_params_st {
557
+ void *ptr;
558
+ } cudaFreeHost_v3020_params;
559
+
560
+ typedef struct cudaFreeArray_v3020_params_st {
561
+ cudaArray_t array;
562
+ } cudaFreeArray_v3020_params;
563
+
564
+ typedef struct cudaFreeMipmappedArray_v5000_params_st {
565
+ cudaMipmappedArray_t mipmappedArray;
566
+ } cudaFreeMipmappedArray_v5000_params;
567
+
568
+ typedef struct cudaHostAlloc_v3020_params_st {
569
+ void **pHost;
570
+ size_t size;
571
+ unsigned int flags;
572
+ } cudaHostAlloc_v3020_params;
573
+
574
+ typedef struct cudaHostRegister_v4000_params_st {
575
+ void *ptr;
576
+ size_t size;
577
+ unsigned int flags;
578
+ } cudaHostRegister_v4000_params;
579
+
580
+ typedef struct cudaHostUnregister_v4000_params_st {
581
+ void *ptr;
582
+ } cudaHostUnregister_v4000_params;
583
+
584
+ typedef struct cudaHostGetDevicePointer_v3020_params_st {
585
+ void **pDevice;
586
+ void *pHost;
587
+ unsigned int flags;
588
+ } cudaHostGetDevicePointer_v3020_params;
589
+
590
+ typedef struct cudaHostGetFlags_v3020_params_st {
591
+ unsigned int *pFlags;
592
+ void *pHost;
593
+ } cudaHostGetFlags_v3020_params;
594
+
595
+ typedef struct cudaMalloc3D_v3020_params_st {
596
+ struct cudaPitchedPtr *pitchedDevPtr;
597
+ struct cudaExtent extent;
598
+ } cudaMalloc3D_v3020_params;
599
+
600
+ typedef struct cudaMalloc3DArray_v3020_params_st {
601
+ cudaArray_t *array;
602
+ const struct cudaChannelFormatDesc *desc;
603
+ struct cudaExtent extent;
604
+ unsigned int flags;
605
+ } cudaMalloc3DArray_v3020_params;
606
+
607
+ typedef struct cudaMallocMipmappedArray_v5000_params_st {
608
+ cudaMipmappedArray_t *mipmappedArray;
609
+ const struct cudaChannelFormatDesc *desc;
610
+ struct cudaExtent extent;
611
+ unsigned int numLevels;
612
+ unsigned int flags;
613
+ } cudaMallocMipmappedArray_v5000_params;
614
+
615
+ typedef struct cudaGetMipmappedArrayLevel_v5000_params_st {
616
+ cudaArray_t *levelArray;
617
+ cudaMipmappedArray_const_t mipmappedArray;
618
+ unsigned int level;
619
+ } cudaGetMipmappedArrayLevel_v5000_params;
620
+
621
+ typedef struct cudaMemcpy3D_ptds_v7000_params_st {
622
+ const struct cudaMemcpy3DParms *p;
623
+ } cudaMemcpy3D_ptds_v7000_params;
624
+
625
+ typedef struct cudaMemcpy3DPeer_ptds_v7000_params_st {
626
+ const struct cudaMemcpy3DPeerParms *p;
627
+ } cudaMemcpy3DPeer_ptds_v7000_params;
628
+
629
+ typedef struct cudaMemcpy3DAsync_ptsz_v7000_params_st {
630
+ const struct cudaMemcpy3DParms *p;
631
+ cudaStream_t stream;
632
+ } cudaMemcpy3DAsync_ptsz_v7000_params;
633
+
634
+ typedef struct cudaMemcpy3DPeerAsync_ptsz_v7000_params_st {
635
+ const struct cudaMemcpy3DPeerParms *p;
636
+ cudaStream_t stream;
637
+ } cudaMemcpy3DPeerAsync_ptsz_v7000_params;
638
+
639
+ typedef struct cudaMemGetInfo_v3020_params_st {
640
+ size_t *free;
641
+ size_t *total;
642
+ } cudaMemGetInfo_v3020_params;
643
+
644
+ typedef struct cudaArrayGetInfo_v4010_params_st {
645
+ struct cudaChannelFormatDesc *desc;
646
+ struct cudaExtent *extent;
647
+ unsigned int *flags;
648
+ cudaArray_t array;
649
+ } cudaArrayGetInfo_v4010_params;
650
+
651
+ typedef struct cudaArrayGetPlane_v11020_params_st {
652
+ cudaArray_t *pPlaneArray;
653
+ cudaArray_t hArray;
654
+ unsigned int planeIdx;
655
+ } cudaArrayGetPlane_v11020_params;
656
+
657
+ typedef struct cudaArrayGetMemoryRequirements_v11060_params_st {
658
+ struct cudaArrayMemoryRequirements *memoryRequirements;
659
+ cudaArray_t array;
660
+ int device;
661
+ } cudaArrayGetMemoryRequirements_v11060_params;
662
+
663
+ typedef struct cudaMipmappedArrayGetMemoryRequirements_v11060_params_st {
664
+ struct cudaArrayMemoryRequirements *memoryRequirements;
665
+ cudaMipmappedArray_t mipmap;
666
+ int device;
667
+ } cudaMipmappedArrayGetMemoryRequirements_v11060_params;
668
+
669
+ typedef struct cudaArrayGetSparseProperties_v11010_params_st {
670
+ struct cudaArraySparseProperties *sparseProperties;
671
+ cudaArray_t array;
672
+ } cudaArrayGetSparseProperties_v11010_params;
673
+
674
+ typedef struct cudaMipmappedArrayGetSparseProperties_v11010_params_st {
675
+ struct cudaArraySparseProperties *sparseProperties;
676
+ cudaMipmappedArray_t mipmap;
677
+ } cudaMipmappedArrayGetSparseProperties_v11010_params;
678
+
679
+ typedef struct cudaMemcpy_ptds_v7000_params_st {
680
+ void *dst;
681
+ const void *src;
682
+ size_t count;
683
+ enum cudaMemcpyKind kind;
684
+ } cudaMemcpy_ptds_v7000_params;
685
+
686
+ typedef struct cudaMemcpyPeer_v4000_params_st {
687
+ void *dst;
688
+ int dstDevice;
689
+ const void *src;
690
+ int srcDevice;
691
+ size_t count;
692
+ } cudaMemcpyPeer_v4000_params;
693
+
694
+ typedef struct cudaMemcpy2D_ptds_v7000_params_st {
695
+ void *dst;
696
+ size_t dpitch;
697
+ const void *src;
698
+ size_t spitch;
699
+ size_t width;
700
+ size_t height;
701
+ enum cudaMemcpyKind kind;
702
+ } cudaMemcpy2D_ptds_v7000_params;
703
+
704
+ typedef struct cudaMemcpy2DToArray_ptds_v7000_params_st {
705
+ cudaArray_t dst;
706
+ size_t wOffset;
707
+ size_t hOffset;
708
+ const void *src;
709
+ size_t spitch;
710
+ size_t width;
711
+ size_t height;
712
+ enum cudaMemcpyKind kind;
713
+ } cudaMemcpy2DToArray_ptds_v7000_params;
714
+
715
+ typedef struct cudaMemcpy2DFromArray_ptds_v7000_params_st {
716
+ void *dst;
717
+ size_t dpitch;
718
+ cudaArray_const_t src;
719
+ size_t wOffset;
720
+ size_t hOffset;
721
+ size_t width;
722
+ size_t height;
723
+ enum cudaMemcpyKind kind;
724
+ } cudaMemcpy2DFromArray_ptds_v7000_params;
725
+
726
+ typedef struct cudaMemcpy2DArrayToArray_ptds_v7000_params_st {
727
+ cudaArray_t dst;
728
+ size_t wOffsetDst;
729
+ size_t hOffsetDst;
730
+ cudaArray_const_t src;
731
+ size_t wOffsetSrc;
732
+ size_t hOffsetSrc;
733
+ size_t width;
734
+ size_t height;
735
+ enum cudaMemcpyKind kind;
736
+ } cudaMemcpy2DArrayToArray_ptds_v7000_params;
737
+
738
+ typedef struct cudaMemcpyToSymbol_ptds_v7000_params_st {
739
+ const void *symbol;
740
+ const void *src;
741
+ size_t count;
742
+ size_t offset;
743
+ enum cudaMemcpyKind kind;
744
+ } cudaMemcpyToSymbol_ptds_v7000_params;
745
+
746
+ typedef struct cudaMemcpyFromSymbol_ptds_v7000_params_st {
747
+ void *dst;
748
+ const void *symbol;
749
+ size_t count;
750
+ size_t offset;
751
+ enum cudaMemcpyKind kind;
752
+ } cudaMemcpyFromSymbol_ptds_v7000_params;
753
+
754
+ typedef struct cudaMemcpyAsync_ptsz_v7000_params_st {
755
+ void *dst;
756
+ const void *src;
757
+ size_t count;
758
+ enum cudaMemcpyKind kind;
759
+ cudaStream_t stream;
760
+ } cudaMemcpyAsync_ptsz_v7000_params;
761
+
762
+ typedef struct cudaMemcpyPeerAsync_v4000_params_st {
763
+ void *dst;
764
+ int dstDevice;
765
+ const void *src;
766
+ int srcDevice;
767
+ size_t count;
768
+ cudaStream_t stream;
769
+ } cudaMemcpyPeerAsync_v4000_params;
770
+
771
+ typedef struct cudaMemcpyBatchAsync_ptsz_v12080_params_st {
772
+ void **dsts;
773
+ void **srcs;
774
+ size_t *sizes;
775
+ size_t count;
776
+ struct cudaMemcpyAttributes *attrs;
777
+ size_t *attrsIdxs;
778
+ size_t numAttrs;
779
+ size_t *failIdx;
780
+ cudaStream_t stream;
781
+ } cudaMemcpyBatchAsync_ptsz_v12080_params;
782
+
783
+ typedef struct cudaMemcpy3DBatchAsync_ptsz_v12080_params_st {
784
+ size_t numOps;
785
+ struct cudaMemcpy3DBatchOp *opList;
786
+ size_t *failIdx;
787
+ unsigned long long flags;
788
+ cudaStream_t stream;
789
+ } cudaMemcpy3DBatchAsync_ptsz_v12080_params;
790
+
791
+ typedef struct cudaMemcpy2DAsync_ptsz_v7000_params_st {
792
+ void *dst;
793
+ size_t dpitch;
794
+ const void *src;
795
+ size_t spitch;
796
+ size_t width;
797
+ size_t height;
798
+ enum cudaMemcpyKind kind;
799
+ cudaStream_t stream;
800
+ } cudaMemcpy2DAsync_ptsz_v7000_params;
801
+
802
+ typedef struct cudaMemcpy2DToArrayAsync_ptsz_v7000_params_st {
803
+ cudaArray_t dst;
804
+ size_t wOffset;
805
+ size_t hOffset;
806
+ const void *src;
807
+ size_t spitch;
808
+ size_t width;
809
+ size_t height;
810
+ enum cudaMemcpyKind kind;
811
+ cudaStream_t stream;
812
+ } cudaMemcpy2DToArrayAsync_ptsz_v7000_params;
813
+
814
+ typedef struct cudaMemcpy2DFromArrayAsync_ptsz_v7000_params_st {
815
+ void *dst;
816
+ size_t dpitch;
817
+ cudaArray_const_t src;
818
+ size_t wOffset;
819
+ size_t hOffset;
820
+ size_t width;
821
+ size_t height;
822
+ enum cudaMemcpyKind kind;
823
+ cudaStream_t stream;
824
+ } cudaMemcpy2DFromArrayAsync_ptsz_v7000_params;
825
+
826
+ typedef struct cudaMemcpyToSymbolAsync_ptsz_v7000_params_st {
827
+ const void *symbol;
828
+ const void *src;
829
+ size_t count;
830
+ size_t offset;
831
+ enum cudaMemcpyKind kind;
832
+ cudaStream_t stream;
833
+ } cudaMemcpyToSymbolAsync_ptsz_v7000_params;
834
+
835
+ typedef struct cudaMemcpyFromSymbolAsync_ptsz_v7000_params_st {
836
+ void *dst;
837
+ const void *symbol;
838
+ size_t count;
839
+ size_t offset;
840
+ enum cudaMemcpyKind kind;
841
+ cudaStream_t stream;
842
+ } cudaMemcpyFromSymbolAsync_ptsz_v7000_params;
843
+
844
+ typedef struct cudaMemset_ptds_v7000_params_st {
845
+ void *devPtr;
846
+ int value;
847
+ size_t count;
848
+ } cudaMemset_ptds_v7000_params;
849
+
850
+ typedef struct cudaMemset2D_ptds_v7000_params_st {
851
+ void *devPtr;
852
+ size_t pitch;
853
+ int value;
854
+ size_t width;
855
+ size_t height;
856
+ } cudaMemset2D_ptds_v7000_params;
857
+
858
+ typedef struct cudaMemset3D_ptds_v7000_params_st {
859
+ struct cudaPitchedPtr pitchedDevPtr;
860
+ int value;
861
+ struct cudaExtent extent;
862
+ } cudaMemset3D_ptds_v7000_params;
863
+
864
+ typedef struct cudaMemsetAsync_ptsz_v7000_params_st {
865
+ void *devPtr;
866
+ int value;
867
+ size_t count;
868
+ cudaStream_t stream;
869
+ } cudaMemsetAsync_ptsz_v7000_params;
870
+
871
+ typedef struct cudaMemset2DAsync_ptsz_v7000_params_st {
872
+ void *devPtr;
873
+ size_t pitch;
874
+ int value;
875
+ size_t width;
876
+ size_t height;
877
+ cudaStream_t stream;
878
+ } cudaMemset2DAsync_ptsz_v7000_params;
879
+
880
+ typedef struct cudaMemset3DAsync_ptsz_v7000_params_st {
881
+ struct cudaPitchedPtr pitchedDevPtr;
882
+ int value;
883
+ struct cudaExtent extent;
884
+ cudaStream_t stream;
885
+ } cudaMemset3DAsync_ptsz_v7000_params;
886
+
887
+ typedef struct cudaGetSymbolAddress_v3020_params_st {
888
+ void **devPtr;
889
+ const void *symbol;
890
+ } cudaGetSymbolAddress_v3020_params;
891
+
892
+ typedef struct cudaGetSymbolSize_v3020_params_st {
893
+ size_t *size;
894
+ const void *symbol;
895
+ } cudaGetSymbolSize_v3020_params;
896
+
897
+ typedef struct cudaMemPrefetchAsync_ptsz_v8000_params_st {
898
+ const void *devPtr;
899
+ size_t count;
900
+ int dstDevice;
901
+ cudaStream_t stream;
902
+ } cudaMemPrefetchAsync_ptsz_v8000_params;
903
+
904
+ typedef struct cudaMemPrefetchAsync_v2_ptsz_v12020_params_st {
905
+ const void *devPtr;
906
+ size_t count;
907
+ struct cudaMemLocation location;
908
+ unsigned int flags;
909
+ cudaStream_t stream;
910
+ } cudaMemPrefetchAsync_v2_ptsz_v12020_params;
911
+
912
+ typedef struct cudaMemAdvise_v8000_params_st {
913
+ const void *devPtr;
914
+ size_t count;
915
+ enum cudaMemoryAdvise advice;
916
+ int device;
917
+ } cudaMemAdvise_v8000_params;
918
+
919
+ typedef struct cudaMemAdvise_v2_v12020_params_st {
920
+ const void *devPtr;
921
+ size_t count;
922
+ enum cudaMemoryAdvise advice;
923
+ struct cudaMemLocation location;
924
+ } cudaMemAdvise_v2_v12020_params;
925
+
926
+ typedef struct cudaMemRangeGetAttribute_v8000_params_st {
927
+ void *data;
928
+ size_t dataSize;
929
+ enum cudaMemRangeAttribute attribute;
930
+ const void *devPtr;
931
+ size_t count;
932
+ } cudaMemRangeGetAttribute_v8000_params;
933
+
934
+ typedef struct cudaMemRangeGetAttributes_v8000_params_st {
935
+ void **data;
936
+ size_t *dataSizes;
937
+ enum cudaMemRangeAttribute *attributes;
938
+ size_t numAttributes;
939
+ const void *devPtr;
940
+ size_t count;
941
+ } cudaMemRangeGetAttributes_v8000_params;
942
+
943
+ typedef struct cudaMemcpyToArray_ptds_v7000_params_st {
944
+ cudaArray_t dst;
945
+ size_t wOffset;
946
+ size_t hOffset;
947
+ const void *src;
948
+ size_t count;
949
+ enum cudaMemcpyKind kind;
950
+ } cudaMemcpyToArray_ptds_v7000_params;
951
+
952
+ typedef struct cudaMemcpyFromArray_ptds_v7000_params_st {
953
+ void *dst;
954
+ cudaArray_const_t src;
955
+ size_t wOffset;
956
+ size_t hOffset;
957
+ size_t count;
958
+ enum cudaMemcpyKind kind;
959
+ } cudaMemcpyFromArray_ptds_v7000_params;
960
+
961
+ typedef struct cudaMemcpyArrayToArray_ptds_v7000_params_st {
962
+ cudaArray_t dst;
963
+ size_t wOffsetDst;
964
+ size_t hOffsetDst;
965
+ cudaArray_const_t src;
966
+ size_t wOffsetSrc;
967
+ size_t hOffsetSrc;
968
+ size_t count;
969
+ enum cudaMemcpyKind kind;
970
+ } cudaMemcpyArrayToArray_ptds_v7000_params;
971
+
972
+ typedef struct cudaMemcpyToArrayAsync_ptsz_v7000_params_st {
973
+ cudaArray_t dst;
974
+ size_t wOffset;
975
+ size_t hOffset;
976
+ const void *src;
977
+ size_t count;
978
+ enum cudaMemcpyKind kind;
979
+ cudaStream_t stream;
980
+ } cudaMemcpyToArrayAsync_ptsz_v7000_params;
981
+
982
+ typedef struct cudaMemcpyFromArrayAsync_ptsz_v7000_params_st {
983
+ void *dst;
984
+ cudaArray_const_t src;
985
+ size_t wOffset;
986
+ size_t hOffset;
987
+ size_t count;
988
+ enum cudaMemcpyKind kind;
989
+ cudaStream_t stream;
990
+ } cudaMemcpyFromArrayAsync_ptsz_v7000_params;
991
+
992
+ typedef struct cudaMallocAsync_ptsz_v11020_params_st {
993
+ void **devPtr;
994
+ size_t size;
995
+ cudaStream_t hStream;
996
+ } cudaMallocAsync_ptsz_v11020_params;
997
+
998
+ typedef struct cudaFreeAsync_ptsz_v11020_params_st {
999
+ void *devPtr;
1000
+ cudaStream_t hStream;
1001
+ } cudaFreeAsync_ptsz_v11020_params;
1002
+
1003
+ typedef struct cudaMemPoolTrimTo_v11020_params_st {
1004
+ cudaMemPool_t memPool;
1005
+ size_t minBytesToKeep;
1006
+ } cudaMemPoolTrimTo_v11020_params;
1007
+
1008
+ typedef struct cudaMemPoolSetAttribute_v11020_params_st {
1009
+ cudaMemPool_t memPool;
1010
+ enum cudaMemPoolAttr attr;
1011
+ void *value;
1012
+ } cudaMemPoolSetAttribute_v11020_params;
1013
+
1014
+ typedef struct cudaMemPoolGetAttribute_v11020_params_st {
1015
+ cudaMemPool_t memPool;
1016
+ enum cudaMemPoolAttr attr;
1017
+ void *value;
1018
+ } cudaMemPoolGetAttribute_v11020_params;
1019
+
1020
+ typedef struct cudaMemPoolSetAccess_v11020_params_st {
1021
+ cudaMemPool_t memPool;
1022
+ const struct cudaMemAccessDesc *descList;
1023
+ size_t count;
1024
+ } cudaMemPoolSetAccess_v11020_params;
1025
+
1026
+ typedef struct cudaMemPoolGetAccess_v11020_params_st {
1027
+ enum cudaMemAccessFlags *flags;
1028
+ cudaMemPool_t memPool;
1029
+ struct cudaMemLocation *location;
1030
+ } cudaMemPoolGetAccess_v11020_params;
1031
+
1032
+ typedef struct cudaMemPoolCreate_v11020_params_st {
1033
+ cudaMemPool_t *memPool;
1034
+ const struct cudaMemPoolProps *poolProps;
1035
+ } cudaMemPoolCreate_v11020_params;
1036
+
1037
+ typedef struct cudaMemPoolDestroy_v11020_params_st {
1038
+ cudaMemPool_t memPool;
1039
+ } cudaMemPoolDestroy_v11020_params;
1040
+
1041
+ typedef struct cudaMallocFromPoolAsync_ptsz_v11020_params_st {
1042
+ void **ptr;
1043
+ size_t size;
1044
+ cudaMemPool_t memPool;
1045
+ cudaStream_t stream;
1046
+ } cudaMallocFromPoolAsync_ptsz_v11020_params;
1047
+
1048
+ typedef struct cudaMemPoolExportToShareableHandle_v11020_params_st {
1049
+ void *shareableHandle;
1050
+ cudaMemPool_t memPool;
1051
+ enum cudaMemAllocationHandleType handleType;
1052
+ unsigned int flags;
1053
+ } cudaMemPoolExportToShareableHandle_v11020_params;
1054
+
1055
+ typedef struct cudaMemPoolImportFromShareableHandle_v11020_params_st {
1056
+ cudaMemPool_t *memPool;
1057
+ void *shareableHandle;
1058
+ enum cudaMemAllocationHandleType handleType;
1059
+ unsigned int flags;
1060
+ } cudaMemPoolImportFromShareableHandle_v11020_params;
1061
+
1062
+ typedef struct cudaMemPoolExportPointer_v11020_params_st {
1063
+ struct cudaMemPoolPtrExportData *exportData;
1064
+ void *ptr;
1065
+ } cudaMemPoolExportPointer_v11020_params;
1066
+
1067
+ typedef struct cudaMemPoolImportPointer_v11020_params_st {
1068
+ void **ptr;
1069
+ cudaMemPool_t memPool;
1070
+ struct cudaMemPoolPtrExportData *exportData;
1071
+ } cudaMemPoolImportPointer_v11020_params;
1072
+
1073
+ typedef struct cudaPointerGetAttributes_v4000_params_st {
1074
+ struct cudaPointerAttributes *attributes;
1075
+ const void *ptr;
1076
+ } cudaPointerGetAttributes_v4000_params;
1077
+
1078
+ typedef struct cudaDeviceCanAccessPeer_v4000_params_st {
1079
+ int *canAccessPeer;
1080
+ int device;
1081
+ int peerDevice;
1082
+ } cudaDeviceCanAccessPeer_v4000_params;
1083
+
1084
+ typedef struct cudaDeviceEnablePeerAccess_v4000_params_st {
1085
+ int peerDevice;
1086
+ unsigned int flags;
1087
+ } cudaDeviceEnablePeerAccess_v4000_params;
1088
+
1089
+ typedef struct cudaDeviceDisablePeerAccess_v4000_params_st {
1090
+ int peerDevice;
1091
+ } cudaDeviceDisablePeerAccess_v4000_params;
1092
+
1093
+ typedef struct cudaGraphicsUnregisterResource_v3020_params_st {
1094
+ cudaGraphicsResource_t resource;
1095
+ } cudaGraphicsUnregisterResource_v3020_params;
1096
+
1097
+ typedef struct cudaGraphicsResourceSetMapFlags_v3020_params_st {
1098
+ cudaGraphicsResource_t resource;
1099
+ unsigned int flags;
1100
+ } cudaGraphicsResourceSetMapFlags_v3020_params;
1101
+
1102
+ typedef struct cudaGraphicsMapResources_v3020_params_st {
1103
+ int count;
1104
+ cudaGraphicsResource_t *resources;
1105
+ cudaStream_t stream;
1106
+ } cudaGraphicsMapResources_v3020_params;
1107
+
1108
+ typedef struct cudaGraphicsUnmapResources_v3020_params_st {
1109
+ int count;
1110
+ cudaGraphicsResource_t *resources;
1111
+ cudaStream_t stream;
1112
+ } cudaGraphicsUnmapResources_v3020_params;
1113
+
1114
+ typedef struct cudaGraphicsResourceGetMappedPointer_v3020_params_st {
1115
+ void **devPtr;
1116
+ size_t *size;
1117
+ cudaGraphicsResource_t resource;
1118
+ } cudaGraphicsResourceGetMappedPointer_v3020_params;
1119
+
1120
+ typedef struct cudaGraphicsSubResourceGetMappedArray_v3020_params_st {
1121
+ cudaArray_t *array;
1122
+ cudaGraphicsResource_t resource;
1123
+ unsigned int arrayIndex;
1124
+ unsigned int mipLevel;
1125
+ } cudaGraphicsSubResourceGetMappedArray_v3020_params;
1126
+
1127
+ typedef struct cudaGraphicsResourceGetMappedMipmappedArray_v5000_params_st {
1128
+ cudaMipmappedArray_t *mipmappedArray;
1129
+ cudaGraphicsResource_t resource;
1130
+ } cudaGraphicsResourceGetMappedMipmappedArray_v5000_params;
1131
+
1132
+ typedef struct cudaGetChannelDesc_v3020_params_st {
1133
+ struct cudaChannelFormatDesc *desc;
1134
+ cudaArray_const_t array;
1135
+ } cudaGetChannelDesc_v3020_params;
1136
+
1137
+ typedef struct cudaCreateChannelDesc_v3020_params_st {
1138
+ int x;
1139
+ int y;
1140
+ int z;
1141
+ int w;
1142
+ enum cudaChannelFormatKind f;
1143
+ } cudaCreateChannelDesc_v3020_params;
1144
+
1145
+ typedef struct cudaCreateTextureObject_v5000_params_st {
1146
+ cudaTextureObject_t *pTexObject;
1147
+ const struct cudaResourceDesc *pResDesc;
1148
+ const struct cudaTextureDesc *pTexDesc;
1149
+ const struct cudaResourceViewDesc *pResViewDesc;
1150
+ } cudaCreateTextureObject_v5000_params;
1151
+
1152
+ typedef struct cudaDestroyTextureObject_v5000_params_st {
1153
+ cudaTextureObject_t texObject;
1154
+ } cudaDestroyTextureObject_v5000_params;
1155
+
1156
+ typedef struct cudaGetTextureObjectResourceDesc_v5000_params_st {
1157
+ struct cudaResourceDesc *pResDesc;
1158
+ cudaTextureObject_t texObject;
1159
+ } cudaGetTextureObjectResourceDesc_v5000_params;
1160
+
1161
+ typedef struct cudaGetTextureObjectTextureDesc_v5000_params_st {
1162
+ struct cudaTextureDesc *pTexDesc;
1163
+ cudaTextureObject_t texObject;
1164
+ } cudaGetTextureObjectTextureDesc_v5000_params;
1165
+
1166
+ typedef struct cudaGetTextureObjectResourceViewDesc_v5000_params_st {
1167
+ struct cudaResourceViewDesc *pResViewDesc;
1168
+ cudaTextureObject_t texObject;
1169
+ } cudaGetTextureObjectResourceViewDesc_v5000_params;
1170
+
1171
+ typedef struct cudaCreateSurfaceObject_v5000_params_st {
1172
+ cudaSurfaceObject_t *pSurfObject;
1173
+ const struct cudaResourceDesc *pResDesc;
1174
+ } cudaCreateSurfaceObject_v5000_params;
1175
+
1176
+ typedef struct cudaDestroySurfaceObject_v5000_params_st {
1177
+ cudaSurfaceObject_t surfObject;
1178
+ } cudaDestroySurfaceObject_v5000_params;
1179
+
1180
+ typedef struct cudaGetSurfaceObjectResourceDesc_v5000_params_st {
1181
+ struct cudaResourceDesc *pResDesc;
1182
+ cudaSurfaceObject_t surfObject;
1183
+ } cudaGetSurfaceObjectResourceDesc_v5000_params;
1184
+
1185
+ typedef struct cudaDriverGetVersion_v3020_params_st {
1186
+ int *driverVersion;
1187
+ } cudaDriverGetVersion_v3020_params;
1188
+
1189
+ typedef struct cudaRuntimeGetVersion_v3020_params_st {
1190
+ int *runtimeVersion;
1191
+ } cudaRuntimeGetVersion_v3020_params;
1192
+
1193
+ typedef struct cudaGraphCreate_v10000_params_st {
1194
+ cudaGraph_t *pGraph;
1195
+ unsigned int flags;
1196
+ } cudaGraphCreate_v10000_params;
1197
+
1198
+ typedef struct cudaGraphAddKernelNode_v10000_params_st {
1199
+ cudaGraphNode_t *pGraphNode;
1200
+ cudaGraph_t graph;
1201
+ const cudaGraphNode_t *pDependencies;
1202
+ size_t numDependencies;
1203
+ const struct cudaKernelNodeParams *pNodeParams;
1204
+ } cudaGraphAddKernelNode_v10000_params;
1205
+
1206
+ typedef struct cudaGraphKernelNodeGetParams_v10000_params_st {
1207
+ cudaGraphNode_t node;
1208
+ struct cudaKernelNodeParams *pNodeParams;
1209
+ } cudaGraphKernelNodeGetParams_v10000_params;
1210
+
1211
+ typedef struct cudaGraphKernelNodeSetParams_v10000_params_st {
1212
+ cudaGraphNode_t node;
1213
+ const struct cudaKernelNodeParams *pNodeParams;
1214
+ } cudaGraphKernelNodeSetParams_v10000_params;
1215
+
1216
+ typedef struct cudaGraphKernelNodeCopyAttributes_v11000_params_st {
1217
+ cudaGraphNode_t hSrc;
1218
+ cudaGraphNode_t hDst;
1219
+ } cudaGraphKernelNodeCopyAttributes_v11000_params;
1220
+
1221
+ typedef struct cudaGraphKernelNodeGetAttribute_v11000_params_st {
1222
+ cudaGraphNode_t hNode;
1223
+ cudaKernelNodeAttrID attr;
1224
+ cudaKernelNodeAttrValue *value_out;
1225
+ } cudaGraphKernelNodeGetAttribute_v11000_params;
1226
+
1227
+ typedef struct cudaGraphKernelNodeSetAttribute_v11000_params_st {
1228
+ cudaGraphNode_t hNode;
1229
+ cudaKernelNodeAttrID attr;
1230
+ const cudaKernelNodeAttrValue *value;
1231
+ } cudaGraphKernelNodeSetAttribute_v11000_params;
1232
+
1233
+ typedef struct cudaGraphAddMemcpyNode_v10000_params_st {
1234
+ cudaGraphNode_t *pGraphNode;
1235
+ cudaGraph_t graph;
1236
+ const cudaGraphNode_t *pDependencies;
1237
+ size_t numDependencies;
1238
+ const struct cudaMemcpy3DParms *pCopyParams;
1239
+ } cudaGraphAddMemcpyNode_v10000_params;
1240
+
1241
+ typedef struct cudaGraphAddMemcpyNodeToSymbol_v11010_params_st {
1242
+ cudaGraphNode_t *pGraphNode;
1243
+ cudaGraph_t graph;
1244
+ const cudaGraphNode_t *pDependencies;
1245
+ size_t numDependencies;
1246
+ const void *symbol;
1247
+ const void *src;
1248
+ size_t count;
1249
+ size_t offset;
1250
+ enum cudaMemcpyKind kind;
1251
+ } cudaGraphAddMemcpyNodeToSymbol_v11010_params;
1252
+
1253
+ typedef struct cudaGraphAddMemcpyNodeFromSymbol_v11010_params_st {
1254
+ cudaGraphNode_t *pGraphNode;
1255
+ cudaGraph_t graph;
1256
+ const cudaGraphNode_t *pDependencies;
1257
+ size_t numDependencies;
1258
+ void *dst;
1259
+ const void *symbol;
1260
+ size_t count;
1261
+ size_t offset;
1262
+ enum cudaMemcpyKind kind;
1263
+ } cudaGraphAddMemcpyNodeFromSymbol_v11010_params;
1264
+
1265
+ typedef struct cudaGraphAddMemcpyNode1D_v11010_params_st {
1266
+ cudaGraphNode_t *pGraphNode;
1267
+ cudaGraph_t graph;
1268
+ const cudaGraphNode_t *pDependencies;
1269
+ size_t numDependencies;
1270
+ void *dst;
1271
+ const void *src;
1272
+ size_t count;
1273
+ enum cudaMemcpyKind kind;
1274
+ } cudaGraphAddMemcpyNode1D_v11010_params;
1275
+
1276
+ typedef struct cudaGraphMemcpyNodeGetParams_v10000_params_st {
1277
+ cudaGraphNode_t node;
1278
+ struct cudaMemcpy3DParms *pNodeParams;
1279
+ } cudaGraphMemcpyNodeGetParams_v10000_params;
1280
+
1281
+ typedef struct cudaGraphMemcpyNodeSetParams_v10000_params_st {
1282
+ cudaGraphNode_t node;
1283
+ const struct cudaMemcpy3DParms *pNodeParams;
1284
+ } cudaGraphMemcpyNodeSetParams_v10000_params;
1285
+
1286
+ typedef struct cudaGraphMemcpyNodeSetParamsToSymbol_v11010_params_st {
1287
+ cudaGraphNode_t node;
1288
+ const void *symbol;
1289
+ const void *src;
1290
+ size_t count;
1291
+ size_t offset;
1292
+ enum cudaMemcpyKind kind;
1293
+ } cudaGraphMemcpyNodeSetParamsToSymbol_v11010_params;
1294
+
1295
+ typedef struct cudaGraphMemcpyNodeSetParamsFromSymbol_v11010_params_st {
1296
+ cudaGraphNode_t node;
1297
+ void *dst;
1298
+ const void *symbol;
1299
+ size_t count;
1300
+ size_t offset;
1301
+ enum cudaMemcpyKind kind;
1302
+ } cudaGraphMemcpyNodeSetParamsFromSymbol_v11010_params;
1303
+
1304
+ typedef struct cudaGraphMemcpyNodeSetParams1D_v11010_params_st {
1305
+ cudaGraphNode_t node;
1306
+ void *dst;
1307
+ const void *src;
1308
+ size_t count;
1309
+ enum cudaMemcpyKind kind;
1310
+ } cudaGraphMemcpyNodeSetParams1D_v11010_params;
1311
+
1312
+ typedef struct cudaGraphAddMemsetNode_v10000_params_st {
1313
+ cudaGraphNode_t *pGraphNode;
1314
+ cudaGraph_t graph;
1315
+ const cudaGraphNode_t *pDependencies;
1316
+ size_t numDependencies;
1317
+ const struct cudaMemsetParams *pMemsetParams;
1318
+ } cudaGraphAddMemsetNode_v10000_params;
1319
+
1320
+ typedef struct cudaGraphMemsetNodeGetParams_v10000_params_st {
1321
+ cudaGraphNode_t node;
1322
+ struct cudaMemsetParams *pNodeParams;
1323
+ } cudaGraphMemsetNodeGetParams_v10000_params;
1324
+
1325
+ typedef struct cudaGraphMemsetNodeSetParams_v10000_params_st {
1326
+ cudaGraphNode_t node;
1327
+ const struct cudaMemsetParams *pNodeParams;
1328
+ } cudaGraphMemsetNodeSetParams_v10000_params;
1329
+
1330
+ typedef struct cudaGraphAddHostNode_v10000_params_st {
1331
+ cudaGraphNode_t *pGraphNode;
1332
+ cudaGraph_t graph;
1333
+ const cudaGraphNode_t *pDependencies;
1334
+ size_t numDependencies;
1335
+ const struct cudaHostNodeParams *pNodeParams;
1336
+ } cudaGraphAddHostNode_v10000_params;
1337
+
1338
+ typedef struct cudaGraphHostNodeGetParams_v10000_params_st {
1339
+ cudaGraphNode_t node;
1340
+ struct cudaHostNodeParams *pNodeParams;
1341
+ } cudaGraphHostNodeGetParams_v10000_params;
1342
+
1343
+ typedef struct cudaGraphHostNodeSetParams_v10000_params_st {
1344
+ cudaGraphNode_t node;
1345
+ const struct cudaHostNodeParams *pNodeParams;
1346
+ } cudaGraphHostNodeSetParams_v10000_params;
1347
+
1348
+ typedef struct cudaGraphAddChildGraphNode_v10000_params_st {
1349
+ cudaGraphNode_t *pGraphNode;
1350
+ cudaGraph_t graph;
1351
+ const cudaGraphNode_t *pDependencies;
1352
+ size_t numDependencies;
1353
+ cudaGraph_t childGraph;
1354
+ } cudaGraphAddChildGraphNode_v10000_params;
1355
+
1356
+ typedef struct cudaGraphChildGraphNodeGetGraph_v10000_params_st {
1357
+ cudaGraphNode_t node;
1358
+ cudaGraph_t *pGraph;
1359
+ } cudaGraphChildGraphNodeGetGraph_v10000_params;
1360
+
1361
+ typedef struct cudaGraphAddEmptyNode_v10000_params_st {
1362
+ cudaGraphNode_t *pGraphNode;
1363
+ cudaGraph_t graph;
1364
+ const cudaGraphNode_t *pDependencies;
1365
+ size_t numDependencies;
1366
+ } cudaGraphAddEmptyNode_v10000_params;
1367
+
1368
+ typedef struct cudaGraphAddEventRecordNode_v11010_params_st {
1369
+ cudaGraphNode_t *pGraphNode;
1370
+ cudaGraph_t graph;
1371
+ const cudaGraphNode_t *pDependencies;
1372
+ size_t numDependencies;
1373
+ cudaEvent_t event;
1374
+ } cudaGraphAddEventRecordNode_v11010_params;
1375
+
1376
+ typedef struct cudaGraphEventRecordNodeGetEvent_v11010_params_st {
1377
+ cudaGraphNode_t node;
1378
+ cudaEvent_t *event_out;
1379
+ } cudaGraphEventRecordNodeGetEvent_v11010_params;
1380
+
1381
+ typedef struct cudaGraphEventRecordNodeSetEvent_v11010_params_st {
1382
+ cudaGraphNode_t node;
1383
+ cudaEvent_t event;
1384
+ } cudaGraphEventRecordNodeSetEvent_v11010_params;
1385
+
1386
+ typedef struct cudaGraphAddEventWaitNode_v11010_params_st {
1387
+ cudaGraphNode_t *pGraphNode;
1388
+ cudaGraph_t graph;
1389
+ const cudaGraphNode_t *pDependencies;
1390
+ size_t numDependencies;
1391
+ cudaEvent_t event;
1392
+ } cudaGraphAddEventWaitNode_v11010_params;
1393
+
1394
+ typedef struct cudaGraphEventWaitNodeGetEvent_v11010_params_st {
1395
+ cudaGraphNode_t node;
1396
+ cudaEvent_t *event_out;
1397
+ } cudaGraphEventWaitNodeGetEvent_v11010_params;
1398
+
1399
+ typedef struct cudaGraphEventWaitNodeSetEvent_v11010_params_st {
1400
+ cudaGraphNode_t node;
1401
+ cudaEvent_t event;
1402
+ } cudaGraphEventWaitNodeSetEvent_v11010_params;
1403
+
1404
+ typedef struct cudaGraphAddExternalSemaphoresSignalNode_v11020_params_st {
1405
+ cudaGraphNode_t *pGraphNode;
1406
+ cudaGraph_t graph;
1407
+ const cudaGraphNode_t *pDependencies;
1408
+ size_t numDependencies;
1409
+ const struct cudaExternalSemaphoreSignalNodeParams *nodeParams;
1410
+ } cudaGraphAddExternalSemaphoresSignalNode_v11020_params;
1411
+
1412
+ typedef struct cudaGraphExternalSemaphoresSignalNodeGetParams_v11020_params_st {
1413
+ cudaGraphNode_t hNode;
1414
+ struct cudaExternalSemaphoreSignalNodeParams *params_out;
1415
+ } cudaGraphExternalSemaphoresSignalNodeGetParams_v11020_params;
1416
+
1417
+ typedef struct cudaGraphExternalSemaphoresSignalNodeSetParams_v11020_params_st {
1418
+ cudaGraphNode_t hNode;
1419
+ const struct cudaExternalSemaphoreSignalNodeParams *nodeParams;
1420
+ } cudaGraphExternalSemaphoresSignalNodeSetParams_v11020_params;
1421
+
1422
+ typedef struct cudaGraphAddExternalSemaphoresWaitNode_v11020_params_st {
1423
+ cudaGraphNode_t *pGraphNode;
1424
+ cudaGraph_t graph;
1425
+ const cudaGraphNode_t *pDependencies;
1426
+ size_t numDependencies;
1427
+ const struct cudaExternalSemaphoreWaitNodeParams *nodeParams;
1428
+ } cudaGraphAddExternalSemaphoresWaitNode_v11020_params;
1429
+
1430
+ typedef struct cudaGraphExternalSemaphoresWaitNodeGetParams_v11020_params_st {
1431
+ cudaGraphNode_t hNode;
1432
+ struct cudaExternalSemaphoreWaitNodeParams *params_out;
1433
+ } cudaGraphExternalSemaphoresWaitNodeGetParams_v11020_params;
1434
+
1435
+ typedef struct cudaGraphExternalSemaphoresWaitNodeSetParams_v11020_params_st {
1436
+ cudaGraphNode_t hNode;
1437
+ const struct cudaExternalSemaphoreWaitNodeParams *nodeParams;
1438
+ } cudaGraphExternalSemaphoresWaitNodeSetParams_v11020_params;
1439
+
1440
+ typedef struct cudaGraphAddMemAllocNode_v11040_params_st {
1441
+ cudaGraphNode_t *pGraphNode;
1442
+ cudaGraph_t graph;
1443
+ const cudaGraphNode_t *pDependencies;
1444
+ size_t numDependencies;
1445
+ struct cudaMemAllocNodeParams *nodeParams;
1446
+ } cudaGraphAddMemAllocNode_v11040_params;
1447
+
1448
+ typedef struct cudaGraphMemAllocNodeGetParams_v11040_params_st {
1449
+ cudaGraphNode_t node;
1450
+ struct cudaMemAllocNodeParams *params_out;
1451
+ } cudaGraphMemAllocNodeGetParams_v11040_params;
1452
+
1453
+ typedef struct cudaGraphAddMemFreeNode_v11040_params_st {
1454
+ cudaGraphNode_t *pGraphNode;
1455
+ cudaGraph_t graph;
1456
+ const cudaGraphNode_t *pDependencies;
1457
+ size_t numDependencies;
1458
+ void *dptr;
1459
+ } cudaGraphAddMemFreeNode_v11040_params;
1460
+
1461
+ typedef struct cudaGraphMemFreeNodeGetParams_v11040_params_st {
1462
+ cudaGraphNode_t node;
1463
+ void *dptr_out;
1464
+ } cudaGraphMemFreeNodeGetParams_v11040_params;
1465
+
1466
+ typedef struct cudaDeviceGraphMemTrim_v11040_params_st {
1467
+ int device;
1468
+ } cudaDeviceGraphMemTrim_v11040_params;
1469
+
1470
+ typedef struct cudaDeviceGetGraphMemAttribute_v11040_params_st {
1471
+ int device;
1472
+ enum cudaGraphMemAttributeType attr;
1473
+ void *value;
1474
+ } cudaDeviceGetGraphMemAttribute_v11040_params;
1475
+
1476
+ typedef struct cudaDeviceSetGraphMemAttribute_v11040_params_st {
1477
+ int device;
1478
+ enum cudaGraphMemAttributeType attr;
1479
+ void *value;
1480
+ } cudaDeviceSetGraphMemAttribute_v11040_params;
1481
+
1482
+ typedef struct cudaGraphClone_v10000_params_st {
1483
+ cudaGraph_t *pGraphClone;
1484
+ cudaGraph_t originalGraph;
1485
+ } cudaGraphClone_v10000_params;
1486
+
1487
+ typedef struct cudaGraphNodeFindInClone_v10000_params_st {
1488
+ cudaGraphNode_t *pNode;
1489
+ cudaGraphNode_t originalNode;
1490
+ cudaGraph_t clonedGraph;
1491
+ } cudaGraphNodeFindInClone_v10000_params;
1492
+
1493
+ typedef struct cudaGraphNodeGetType_v10000_params_st {
1494
+ cudaGraphNode_t node;
1495
+ enum cudaGraphNodeType *pType;
1496
+ } cudaGraphNodeGetType_v10000_params;
1497
+
1498
+ typedef struct cudaGraphGetNodes_v10000_params_st {
1499
+ cudaGraph_t graph;
1500
+ cudaGraphNode_t *nodes;
1501
+ size_t *numNodes;
1502
+ } cudaGraphGetNodes_v10000_params;
1503
+
1504
+ typedef struct cudaGraphGetRootNodes_v10000_params_st {
1505
+ cudaGraph_t graph;
1506
+ cudaGraphNode_t *pRootNodes;
1507
+ size_t *pNumRootNodes;
1508
+ } cudaGraphGetRootNodes_v10000_params;
1509
+
1510
+ typedef struct cudaGraphGetEdges_v10000_params_st {
1511
+ cudaGraph_t graph;
1512
+ cudaGraphNode_t *from;
1513
+ cudaGraphNode_t *to;
1514
+ size_t *numEdges;
1515
+ } cudaGraphGetEdges_v10000_params;
1516
+
1517
+ typedef struct cudaGraphGetEdges_v2_v12030_params_st {
1518
+ cudaGraph_t graph;
1519
+ cudaGraphNode_t *from;
1520
+ cudaGraphNode_t *to;
1521
+ cudaGraphEdgeData *edgeData;
1522
+ size_t *numEdges;
1523
+ } cudaGraphGetEdges_v2_v12030_params;
1524
+
1525
+ typedef struct cudaGraphNodeGetDependencies_v10000_params_st {
1526
+ cudaGraphNode_t node;
1527
+ cudaGraphNode_t *pDependencies;
1528
+ size_t *pNumDependencies;
1529
+ } cudaGraphNodeGetDependencies_v10000_params;
1530
+
1531
+ typedef struct cudaGraphNodeGetDependencies_v2_v12030_params_st {
1532
+ cudaGraphNode_t node;
1533
+ cudaGraphNode_t *pDependencies;
1534
+ cudaGraphEdgeData *edgeData;
1535
+ size_t *pNumDependencies;
1536
+ } cudaGraphNodeGetDependencies_v2_v12030_params;
1537
+
1538
+ typedef struct cudaGraphNodeGetDependentNodes_v10000_params_st {
1539
+ cudaGraphNode_t node;
1540
+ cudaGraphNode_t *pDependentNodes;
1541
+ size_t *pNumDependentNodes;
1542
+ } cudaGraphNodeGetDependentNodes_v10000_params;
1543
+
1544
+ typedef struct cudaGraphNodeGetDependentNodes_v2_v12030_params_st {
1545
+ cudaGraphNode_t node;
1546
+ cudaGraphNode_t *pDependentNodes;
1547
+ cudaGraphEdgeData *edgeData;
1548
+ size_t *pNumDependentNodes;
1549
+ } cudaGraphNodeGetDependentNodes_v2_v12030_params;
1550
+
1551
+ typedef struct cudaGraphAddDependencies_v10000_params_st {
1552
+ cudaGraph_t graph;
1553
+ const cudaGraphNode_t *from;
1554
+ const cudaGraphNode_t *to;
1555
+ size_t numDependencies;
1556
+ } cudaGraphAddDependencies_v10000_params;
1557
+
1558
+ typedef struct cudaGraphAddDependencies_v2_v12030_params_st {
1559
+ cudaGraph_t graph;
1560
+ const cudaGraphNode_t *from;
1561
+ const cudaGraphNode_t *to;
1562
+ const cudaGraphEdgeData *edgeData;
1563
+ size_t numDependencies;
1564
+ } cudaGraphAddDependencies_v2_v12030_params;
1565
+
1566
+ typedef struct cudaGraphRemoveDependencies_v10000_params_st {
1567
+ cudaGraph_t graph;
1568
+ const cudaGraphNode_t *from;
1569
+ const cudaGraphNode_t *to;
1570
+ size_t numDependencies;
1571
+ } cudaGraphRemoveDependencies_v10000_params;
1572
+
1573
+ typedef struct cudaGraphRemoveDependencies_v2_v12030_params_st {
1574
+ cudaGraph_t graph;
1575
+ const cudaGraphNode_t *from;
1576
+ const cudaGraphNode_t *to;
1577
+ const cudaGraphEdgeData *edgeData;
1578
+ size_t numDependencies;
1579
+ } cudaGraphRemoveDependencies_v2_v12030_params;
1580
+
1581
+ typedef struct cudaGraphDestroyNode_v10000_params_st {
1582
+ cudaGraphNode_t node;
1583
+ } cudaGraphDestroyNode_v10000_params;
1584
+
1585
+ typedef struct cudaGraphInstantiate_v12000_params_st {
1586
+ cudaGraphExec_t *pGraphExec;
1587
+ cudaGraph_t graph;
1588
+ unsigned long long flags;
1589
+ } cudaGraphInstantiate_v12000_params;
1590
+
1591
+ typedef struct cudaGraphInstantiateWithFlags_v11040_params_st {
1592
+ cudaGraphExec_t *pGraphExec;
1593
+ cudaGraph_t graph;
1594
+ unsigned long long flags;
1595
+ } cudaGraphInstantiateWithFlags_v11040_params;
1596
+
1597
+ typedef struct cudaGraphInstantiateWithParams_ptsz_v12000_params_st {
1598
+ cudaGraphExec_t *pGraphExec;
1599
+ cudaGraph_t graph;
1600
+ cudaGraphInstantiateParams *instantiateParams;
1601
+ } cudaGraphInstantiateWithParams_ptsz_v12000_params;
1602
+
1603
+ typedef struct cudaGraphExecGetFlags_v12000_params_st {
1604
+ cudaGraphExec_t graphExec;
1605
+ unsigned long long *flags;
1606
+ } cudaGraphExecGetFlags_v12000_params;
1607
+
1608
+ typedef struct cudaGraphExecKernelNodeSetParams_v10010_params_st {
1609
+ cudaGraphExec_t hGraphExec;
1610
+ cudaGraphNode_t node;
1611
+ const struct cudaKernelNodeParams *pNodeParams;
1612
+ } cudaGraphExecKernelNodeSetParams_v10010_params;
1613
+
1614
+ typedef struct cudaGraphExecMemcpyNodeSetParams_v10020_params_st {
1615
+ cudaGraphExec_t hGraphExec;
1616
+ cudaGraphNode_t node;
1617
+ const struct cudaMemcpy3DParms *pNodeParams;
1618
+ } cudaGraphExecMemcpyNodeSetParams_v10020_params;
1619
+
1620
+ typedef struct cudaGraphExecMemcpyNodeSetParamsToSymbol_v11010_params_st {
1621
+ cudaGraphExec_t hGraphExec;
1622
+ cudaGraphNode_t node;
1623
+ const void *symbol;
1624
+ const void *src;
1625
+ size_t count;
1626
+ size_t offset;
1627
+ enum cudaMemcpyKind kind;
1628
+ } cudaGraphExecMemcpyNodeSetParamsToSymbol_v11010_params;
1629
+
1630
+ typedef struct cudaGraphExecMemcpyNodeSetParamsFromSymbol_v11010_params_st {
1631
+ cudaGraphExec_t hGraphExec;
1632
+ cudaGraphNode_t node;
1633
+ void *dst;
1634
+ const void *symbol;
1635
+ size_t count;
1636
+ size_t offset;
1637
+ enum cudaMemcpyKind kind;
1638
+ } cudaGraphExecMemcpyNodeSetParamsFromSymbol_v11010_params;
1639
+
1640
+ typedef struct cudaGraphExecMemcpyNodeSetParams1D_v11010_params_st {
1641
+ cudaGraphExec_t hGraphExec;
1642
+ cudaGraphNode_t node;
1643
+ void *dst;
1644
+ const void *src;
1645
+ size_t count;
1646
+ enum cudaMemcpyKind kind;
1647
+ } cudaGraphExecMemcpyNodeSetParams1D_v11010_params;
1648
+
1649
+ typedef struct cudaGraphExecMemsetNodeSetParams_v10020_params_st {
1650
+ cudaGraphExec_t hGraphExec;
1651
+ cudaGraphNode_t node;
1652
+ const struct cudaMemsetParams *pNodeParams;
1653
+ } cudaGraphExecMemsetNodeSetParams_v10020_params;
1654
+
1655
+ typedef struct cudaGraphExecHostNodeSetParams_v10020_params_st {
1656
+ cudaGraphExec_t hGraphExec;
1657
+ cudaGraphNode_t node;
1658
+ const struct cudaHostNodeParams *pNodeParams;
1659
+ } cudaGraphExecHostNodeSetParams_v10020_params;
1660
+
1661
+ typedef struct cudaGraphExecChildGraphNodeSetParams_v11010_params_st {
1662
+ cudaGraphExec_t hGraphExec;
1663
+ cudaGraphNode_t node;
1664
+ cudaGraph_t childGraph;
1665
+ } cudaGraphExecChildGraphNodeSetParams_v11010_params;
1666
+
1667
+ typedef struct cudaGraphExecEventRecordNodeSetEvent_v11010_params_st {
1668
+ cudaGraphExec_t hGraphExec;
1669
+ cudaGraphNode_t hNode;
1670
+ cudaEvent_t event;
1671
+ } cudaGraphExecEventRecordNodeSetEvent_v11010_params;
1672
+
1673
+ typedef struct cudaGraphExecEventWaitNodeSetEvent_v11010_params_st {
1674
+ cudaGraphExec_t hGraphExec;
1675
+ cudaGraphNode_t hNode;
1676
+ cudaEvent_t event;
1677
+ } cudaGraphExecEventWaitNodeSetEvent_v11010_params;
1678
+
1679
+ typedef struct cudaGraphExecExternalSemaphoresSignalNodeSetParams_v11020_params_st {
1680
+ cudaGraphExec_t hGraphExec;
1681
+ cudaGraphNode_t hNode;
1682
+ const struct cudaExternalSemaphoreSignalNodeParams *nodeParams;
1683
+ } cudaGraphExecExternalSemaphoresSignalNodeSetParams_v11020_params;
1684
+
1685
+ typedef struct cudaGraphExecExternalSemaphoresWaitNodeSetParams_v11020_params_st {
1686
+ cudaGraphExec_t hGraphExec;
1687
+ cudaGraphNode_t hNode;
1688
+ const struct cudaExternalSemaphoreWaitNodeParams *nodeParams;
1689
+ } cudaGraphExecExternalSemaphoresWaitNodeSetParams_v11020_params;
1690
+
1691
+ typedef struct cudaGraphNodeSetEnabled_v11060_params_st {
1692
+ cudaGraphExec_t hGraphExec;
1693
+ cudaGraphNode_t hNode;
1694
+ unsigned int isEnabled;
1695
+ } cudaGraphNodeSetEnabled_v11060_params;
1696
+
1697
+ typedef struct cudaGraphNodeGetEnabled_v11060_params_st {
1698
+ cudaGraphExec_t hGraphExec;
1699
+ cudaGraphNode_t hNode;
1700
+ unsigned int *isEnabled;
1701
+ } cudaGraphNodeGetEnabled_v11060_params;
1702
+
1703
+ typedef struct cudaGraphExecUpdate_v10020_params_st {
1704
+ cudaGraphExec_t hGraphExec;
1705
+ cudaGraph_t hGraph;
1706
+ cudaGraphExecUpdateResultInfo *resultInfo;
1707
+ } cudaGraphExecUpdate_v10020_params;
1708
+
1709
+ typedef struct cudaGraphUpload_ptsz_v10000_params_st {
1710
+ cudaGraphExec_t graphExec;
1711
+ cudaStream_t stream;
1712
+ } cudaGraphUpload_ptsz_v10000_params;
1713
+
1714
+ typedef struct cudaGraphLaunch_ptsz_v10000_params_st {
1715
+ cudaGraphExec_t graphExec;
1716
+ cudaStream_t stream;
1717
+ } cudaGraphLaunch_ptsz_v10000_params;
1718
+
1719
+ typedef struct cudaGraphExecDestroy_v10000_params_st {
1720
+ cudaGraphExec_t graphExec;
1721
+ } cudaGraphExecDestroy_v10000_params;
1722
+
1723
+ typedef struct cudaGraphDestroy_v10000_params_st {
1724
+ cudaGraph_t graph;
1725
+ } cudaGraphDestroy_v10000_params;
1726
+
1727
+ typedef struct cudaGraphDebugDotPrint_v11030_params_st {
1728
+ cudaGraph_t graph;
1729
+ const char *path;
1730
+ unsigned int flags;
1731
+ } cudaGraphDebugDotPrint_v11030_params;
1732
+
1733
+ typedef struct cudaUserObjectCreate_v11030_params_st {
1734
+ cudaUserObject_t *object_out;
1735
+ void *ptr;
1736
+ cudaHostFn_t destroy;
1737
+ unsigned int initialRefcount;
1738
+ unsigned int flags;
1739
+ } cudaUserObjectCreate_v11030_params;
1740
+
1741
+ typedef struct cudaUserObjectRetain_v11030_params_st {
1742
+ cudaUserObject_t object;
1743
+ unsigned int count;
1744
+ } cudaUserObjectRetain_v11030_params;
1745
+
1746
+ typedef struct cudaUserObjectRelease_v11030_params_st {
1747
+ cudaUserObject_t object;
1748
+ unsigned int count;
1749
+ } cudaUserObjectRelease_v11030_params;
1750
+
1751
+ typedef struct cudaGraphRetainUserObject_v11030_params_st {
1752
+ cudaGraph_t graph;
1753
+ cudaUserObject_t object;
1754
+ unsigned int count;
1755
+ unsigned int flags;
1756
+ } cudaGraphRetainUserObject_v11030_params;
1757
+
1758
+ typedef struct cudaGraphReleaseUserObject_v11030_params_st {
1759
+ cudaGraph_t graph;
1760
+ cudaUserObject_t object;
1761
+ unsigned int count;
1762
+ } cudaGraphReleaseUserObject_v11030_params;
1763
+
1764
+ typedef struct cudaGraphAddNode_v12020_params_st {
1765
+ cudaGraphNode_t *pGraphNode;
1766
+ cudaGraph_t graph;
1767
+ const cudaGraphNode_t *pDependencies;
1768
+ size_t numDependencies;
1769
+ struct cudaGraphNodeParams *nodeParams;
1770
+ } cudaGraphAddNode_v12020_params;
1771
+
1772
+ typedef struct cudaGraphAddNode_v2_v12030_params_st {
1773
+ cudaGraphNode_t *pGraphNode;
1774
+ cudaGraph_t graph;
1775
+ const cudaGraphNode_t *pDependencies;
1776
+ const cudaGraphEdgeData *dependencyData;
1777
+ size_t numDependencies;
1778
+ struct cudaGraphNodeParams *nodeParams;
1779
+ } cudaGraphAddNode_v2_v12030_params;
1780
+
1781
+ typedef struct cudaGraphNodeSetParams_v12020_params_st {
1782
+ cudaGraphNode_t node;
1783
+ struct cudaGraphNodeParams *nodeParams;
1784
+ } cudaGraphNodeSetParams_v12020_params;
1785
+
1786
+ typedef struct cudaGraphExecNodeSetParams_v12020_params_st {
1787
+ cudaGraphExec_t graphExec;
1788
+ cudaGraphNode_t node;
1789
+ struct cudaGraphNodeParams *nodeParams;
1790
+ } cudaGraphExecNodeSetParams_v12020_params;
1791
+
1792
+ typedef struct cudaGraphConditionalHandleCreate_v12030_params_st {
1793
+ cudaGraphConditionalHandle *pHandle_out;
1794
+ cudaGraph_t graph;
1795
+ unsigned int defaultLaunchValue;
1796
+ unsigned int flags;
1797
+ } cudaGraphConditionalHandleCreate_v12030_params;
1798
+
1799
+ typedef struct cudaGetDriverEntryPoint_ptsz_v11030_params_st {
1800
+ const char *symbol;
1801
+ void **funcPtr;
1802
+ unsigned long long flags;
1803
+ enum cudaDriverEntryPointQueryResult *driverStatus;
1804
+ } cudaGetDriverEntryPoint_ptsz_v11030_params;
1805
+
1806
+ typedef struct cudaGetDriverEntryPointByVersion_ptsz_v12050_params_st {
1807
+ const char *symbol;
1808
+ void **funcPtr;
1809
+ unsigned int cudaVersion;
1810
+ unsigned long long flags;
1811
+ enum cudaDriverEntryPointQueryResult *driverStatus;
1812
+ } cudaGetDriverEntryPointByVersion_ptsz_v12050_params;
1813
+
1814
+ typedef struct cudaGetFuncBySymbol_v11000_params_st {
1815
+ cudaFunction_t *functionPtr;
1816
+ const void *symbolPtr;
1817
+ } cudaGetFuncBySymbol_v11000_params;
1818
+
1819
+ typedef struct cudaGetKernel_v12000_params_st {
1820
+ cudaKernel_t *kernelPtr;
1821
+ const void *entryFuncAddr;
1822
+ } cudaGetKernel_v12000_params;
1823
+
1824
+ typedef struct cudaMemcpy_v3020_params_st {
1825
+ void *dst;
1826
+ const void *src;
1827
+ size_t count;
1828
+ enum cudaMemcpyKind kind;
1829
+ } cudaMemcpy_v3020_params;
1830
+
1831
+ typedef struct cudaMemcpyToSymbol_v3020_params_st {
1832
+ const void *symbol;
1833
+ const void *src;
1834
+ size_t count;
1835
+ size_t offset;
1836
+ enum cudaMemcpyKind kind;
1837
+ } cudaMemcpyToSymbol_v3020_params;
1838
+
1839
+ typedef struct cudaMemcpyFromSymbol_v3020_params_st {
1840
+ void *dst;
1841
+ const void *symbol;
1842
+ size_t count;
1843
+ size_t offset;
1844
+ enum cudaMemcpyKind kind;
1845
+ } cudaMemcpyFromSymbol_v3020_params;
1846
+
1847
+ typedef struct cudaMemcpy2D_v3020_params_st {
1848
+ void *dst;
1849
+ size_t dpitch;
1850
+ const void *src;
1851
+ size_t spitch;
1852
+ size_t width;
1853
+ size_t height;
1854
+ enum cudaMemcpyKind kind;
1855
+ } cudaMemcpy2D_v3020_params;
1856
+
1857
+ typedef struct cudaMemcpyToArray_v3020_params_st {
1858
+ cudaArray_t dst;
1859
+ size_t wOffset;
1860
+ size_t hOffset;
1861
+ const void *src;
1862
+ size_t count;
1863
+ enum cudaMemcpyKind kind;
1864
+ } cudaMemcpyToArray_v3020_params;
1865
+
1866
+ typedef struct cudaMemcpy2DToArray_v3020_params_st {
1867
+ cudaArray_t dst;
1868
+ size_t wOffset;
1869
+ size_t hOffset;
1870
+ const void *src;
1871
+ size_t spitch;
1872
+ size_t width;
1873
+ size_t height;
1874
+ enum cudaMemcpyKind kind;
1875
+ } cudaMemcpy2DToArray_v3020_params;
1876
+
1877
+ typedef struct cudaMemcpyFromArray_v3020_params_st {
1878
+ void *dst;
1879
+ cudaArray_const_t src;
1880
+ size_t wOffset;
1881
+ size_t hOffset;
1882
+ size_t count;
1883
+ enum cudaMemcpyKind kind;
1884
+ } cudaMemcpyFromArray_v3020_params;
1885
+
1886
+ typedef struct cudaMemcpy2DFromArray_v3020_params_st {
1887
+ void *dst;
1888
+ size_t dpitch;
1889
+ cudaArray_const_t src;
1890
+ size_t wOffset;
1891
+ size_t hOffset;
1892
+ size_t width;
1893
+ size_t height;
1894
+ enum cudaMemcpyKind kind;
1895
+ } cudaMemcpy2DFromArray_v3020_params;
1896
+
1897
+ typedef struct cudaMemcpyArrayToArray_v3020_params_st {
1898
+ cudaArray_t dst;
1899
+ size_t wOffsetDst;
1900
+ size_t hOffsetDst;
1901
+ cudaArray_const_t src;
1902
+ size_t wOffsetSrc;
1903
+ size_t hOffsetSrc;
1904
+ size_t count;
1905
+ enum cudaMemcpyKind kind;
1906
+ } cudaMemcpyArrayToArray_v3020_params;
1907
+
1908
+ typedef struct cudaMemcpy2DArrayToArray_v3020_params_st {
1909
+ cudaArray_t dst;
1910
+ size_t wOffsetDst;
1911
+ size_t hOffsetDst;
1912
+ cudaArray_const_t src;
1913
+ size_t wOffsetSrc;
1914
+ size_t hOffsetSrc;
1915
+ size_t width;
1916
+ size_t height;
1917
+ enum cudaMemcpyKind kind;
1918
+ } cudaMemcpy2DArrayToArray_v3020_params;
1919
+
1920
+ typedef struct cudaMemcpy3D_v3020_params_st {
1921
+ const struct cudaMemcpy3DParms *p;
1922
+ } cudaMemcpy3D_v3020_params;
1923
+
1924
+ typedef struct cudaMemcpy3DPeer_v4000_params_st {
1925
+ const struct cudaMemcpy3DPeerParms *p;
1926
+ } cudaMemcpy3DPeer_v4000_params;
1927
+
1928
+ typedef struct cudaMemcpyBatchAsync_v12080_params_st {
1929
+ void **dsts;
1930
+ void **srcs;
1931
+ size_t *sizes;
1932
+ size_t count;
1933
+ struct cudaMemcpyAttributes *attrs;
1934
+ size_t *attrsIdxs;
1935
+ size_t numAttrs;
1936
+ size_t *failIdx;
1937
+ cudaStream_t stream;
1938
+ } cudaMemcpyBatchAsync_v12080_params;
1939
+
1940
+ typedef struct cudaMemcpy3DBatchAsync_v12080_params_st {
1941
+ size_t numOps;
1942
+ struct cudaMemcpy3DBatchOp *opList;
1943
+ size_t *failIdx;
1944
+ unsigned long long flags;
1945
+ cudaStream_t stream;
1946
+ } cudaMemcpy3DBatchAsync_v12080_params;
1947
+
1948
+ typedef struct cudaMemset_v3020_params_st {
1949
+ void *devPtr;
1950
+ int value;
1951
+ size_t count;
1952
+ } cudaMemset_v3020_params;
1953
+
1954
+ typedef struct cudaMemset2D_v3020_params_st {
1955
+ void *devPtr;
1956
+ size_t pitch;
1957
+ int value;
1958
+ size_t width;
1959
+ size_t height;
1960
+ } cudaMemset2D_v3020_params;
1961
+
1962
+ typedef struct cudaMemset3D_v3020_params_st {
1963
+ struct cudaPitchedPtr pitchedDevPtr;
1964
+ int value;
1965
+ struct cudaExtent extent;
1966
+ } cudaMemset3D_v3020_params;
1967
+
1968
+ typedef struct cudaMemcpyAsync_v3020_params_st {
1969
+ void *dst;
1970
+ const void *src;
1971
+ size_t count;
1972
+ enum cudaMemcpyKind kind;
1973
+ cudaStream_t stream;
1974
+ } cudaMemcpyAsync_v3020_params;
1975
+
1976
+ typedef struct cudaMemcpyToSymbolAsync_v3020_params_st {
1977
+ const void *symbol;
1978
+ const void *src;
1979
+ size_t count;
1980
+ size_t offset;
1981
+ enum cudaMemcpyKind kind;
1982
+ cudaStream_t stream;
1983
+ } cudaMemcpyToSymbolAsync_v3020_params;
1984
+
1985
+ typedef struct cudaMemcpyFromSymbolAsync_v3020_params_st {
1986
+ void *dst;
1987
+ const void *symbol;
1988
+ size_t count;
1989
+ size_t offset;
1990
+ enum cudaMemcpyKind kind;
1991
+ cudaStream_t stream;
1992
+ } cudaMemcpyFromSymbolAsync_v3020_params;
1993
+
1994
+ typedef struct cudaMemcpy2DAsync_v3020_params_st {
1995
+ void *dst;
1996
+ size_t dpitch;
1997
+ const void *src;
1998
+ size_t spitch;
1999
+ size_t width;
2000
+ size_t height;
2001
+ enum cudaMemcpyKind kind;
2002
+ cudaStream_t stream;
2003
+ } cudaMemcpy2DAsync_v3020_params;
2004
+
2005
+ typedef struct cudaMemcpyToArrayAsync_v3020_params_st {
2006
+ cudaArray_t dst;
2007
+ size_t wOffset;
2008
+ size_t hOffset;
2009
+ const void *src;
2010
+ size_t count;
2011
+ enum cudaMemcpyKind kind;
2012
+ cudaStream_t stream;
2013
+ } cudaMemcpyToArrayAsync_v3020_params;
2014
+
2015
+ typedef struct cudaMemcpy2DToArrayAsync_v3020_params_st {
2016
+ cudaArray_t dst;
2017
+ size_t wOffset;
2018
+ size_t hOffset;
2019
+ const void *src;
2020
+ size_t spitch;
2021
+ size_t width;
2022
+ size_t height;
2023
+ enum cudaMemcpyKind kind;
2024
+ cudaStream_t stream;
2025
+ } cudaMemcpy2DToArrayAsync_v3020_params;
2026
+
2027
+ typedef struct cudaMemcpyFromArrayAsync_v3020_params_st {
2028
+ void *dst;
2029
+ cudaArray_const_t src;
2030
+ size_t wOffset;
2031
+ size_t hOffset;
2032
+ size_t count;
2033
+ enum cudaMemcpyKind kind;
2034
+ cudaStream_t stream;
2035
+ } cudaMemcpyFromArrayAsync_v3020_params;
2036
+
2037
+ typedef struct cudaMemcpy2DFromArrayAsync_v3020_params_st {
2038
+ void *dst;
2039
+ size_t dpitch;
2040
+ cudaArray_const_t src;
2041
+ size_t wOffset;
2042
+ size_t hOffset;
2043
+ size_t width;
2044
+ size_t height;
2045
+ enum cudaMemcpyKind kind;
2046
+ cudaStream_t stream;
2047
+ } cudaMemcpy2DFromArrayAsync_v3020_params;
2048
+
2049
+ typedef struct cudaMemcpy3DAsync_v3020_params_st {
2050
+ const struct cudaMemcpy3DParms *p;
2051
+ cudaStream_t stream;
2052
+ } cudaMemcpy3DAsync_v3020_params;
2053
+
2054
+ typedef struct cudaMemcpy3DPeerAsync_v4000_params_st {
2055
+ const struct cudaMemcpy3DPeerParms *p;
2056
+ cudaStream_t stream;
2057
+ } cudaMemcpy3DPeerAsync_v4000_params;
2058
+
2059
+ typedef struct cudaMemsetAsync_v3020_params_st {
2060
+ void *devPtr;
2061
+ int value;
2062
+ size_t count;
2063
+ cudaStream_t stream;
2064
+ } cudaMemsetAsync_v3020_params;
2065
+
2066
+ typedef struct cudaMemset2DAsync_v3020_params_st {
2067
+ void *devPtr;
2068
+ size_t pitch;
2069
+ int value;
2070
+ size_t width;
2071
+ size_t height;
2072
+ cudaStream_t stream;
2073
+ } cudaMemset2DAsync_v3020_params;
2074
+
2075
+ typedef struct cudaMemset3DAsync_v3020_params_st {
2076
+ struct cudaPitchedPtr pitchedDevPtr;
2077
+ int value;
2078
+ struct cudaExtent extent;
2079
+ cudaStream_t stream;
2080
+ } cudaMemset3DAsync_v3020_params;
2081
+
2082
+ typedef struct cudaStreamQuery_v3020_params_st {
2083
+ cudaStream_t stream;
2084
+ } cudaStreamQuery_v3020_params;
2085
+
2086
+ typedef struct cudaStreamGetDevice_v12080_params_st {
2087
+ cudaStream_t hStream;
2088
+ int *device;
2089
+ } cudaStreamGetDevice_v12080_params;
2090
+
2091
+ typedef struct cudaStreamGetFlags_v5050_params_st {
2092
+ cudaStream_t hStream;
2093
+ unsigned int *flags;
2094
+ } cudaStreamGetFlags_v5050_params;
2095
+
2096
+ typedef struct cudaStreamGetId_v12000_params_st {
2097
+ cudaStream_t hStream;
2098
+ unsigned long long *streamId;
2099
+ } cudaStreamGetId_v12000_params;
2100
+
2101
+ typedef struct cudaStreamGetPriority_v5050_params_st {
2102
+ cudaStream_t hStream;
2103
+ int *priority;
2104
+ } cudaStreamGetPriority_v5050_params;
2105
+
2106
+ typedef struct cudaEventRecord_v3020_params_st {
2107
+ cudaEvent_t event;
2108
+ cudaStream_t stream;
2109
+ } cudaEventRecord_v3020_params;
2110
+
2111
+ typedef struct cudaEventRecordWithFlags_v11010_params_st {
2112
+ cudaEvent_t event;
2113
+ cudaStream_t stream;
2114
+ unsigned int flags;
2115
+ } cudaEventRecordWithFlags_v11010_params;
2116
+
2117
+ typedef struct cudaStreamWaitEvent_v3020_params_st {
2118
+ cudaStream_t stream;
2119
+ cudaEvent_t event;
2120
+ unsigned int flags;
2121
+ } cudaStreamWaitEvent_v3020_params;
2122
+
2123
+ typedef struct cudaStreamAddCallback_v5000_params_st {
2124
+ cudaStream_t stream;
2125
+ cudaStreamCallback_t callback;
2126
+ void *userData;
2127
+ unsigned int flags;
2128
+ } cudaStreamAddCallback_v5000_params;
2129
+
2130
+ typedef struct cudaStreamAttachMemAsync_v6000_params_st {
2131
+ cudaStream_t stream;
2132
+ void *devPtr;
2133
+ size_t length;
2134
+ unsigned int flags;
2135
+ } cudaStreamAttachMemAsync_v6000_params;
2136
+
2137
+ typedef struct cudaStreamSynchronize_v3020_params_st {
2138
+ cudaStream_t stream;
2139
+ } cudaStreamSynchronize_v3020_params;
2140
+
2141
+ typedef struct cudaLaunchKernel_v7000_params_st {
2142
+ const void *func;
2143
+ dim3 gridDim;
2144
+ dim3 blockDim;
2145
+ void **args;
2146
+ size_t sharedMem;
2147
+ cudaStream_t stream;
2148
+ } cudaLaunchKernel_v7000_params;
2149
+
2150
+ typedef struct cudaLaunchKernelExC_v11060_params_st {
2151
+ const cudaLaunchConfig_t *config;
2152
+ const void *func;
2153
+ void **args;
2154
+ } cudaLaunchKernelExC_v11060_params;
2155
+
2156
+ typedef struct cudaLaunchCooperativeKernel_v9000_params_st {
2157
+ const void *func;
2158
+ dim3 gridDim;
2159
+ dim3 blockDim;
2160
+ void **args;
2161
+ size_t sharedMem;
2162
+ cudaStream_t stream;
2163
+ } cudaLaunchCooperativeKernel_v9000_params;
2164
+
2165
+ typedef struct cudaLaunchHostFunc_v10000_params_st {
2166
+ cudaStream_t stream;
2167
+ cudaHostFn_t fn;
2168
+ void *userData;
2169
+ } cudaLaunchHostFunc_v10000_params;
2170
+
2171
+ typedef struct cudaMemPrefetchAsync_v8000_params_st {
2172
+ const void *devPtr;
2173
+ size_t count;
2174
+ int dstDevice;
2175
+ cudaStream_t stream;
2176
+ } cudaMemPrefetchAsync_v8000_params;
2177
+
2178
+ typedef struct cudaMemPrefetchAsync_v2_v12020_params_st {
2179
+ const void *devPtr;
2180
+ size_t count;
2181
+ struct cudaMemLocation location;
2182
+ unsigned int flags;
2183
+ cudaStream_t stream;
2184
+ } cudaMemPrefetchAsync_v2_v12020_params;
2185
+
2186
+ typedef struct cudaSignalExternalSemaphoresAsync_v10000_params_st {
2187
+ const cudaExternalSemaphore_t *extSemArray;
2188
+ const struct cudaExternalSemaphoreSignalParams_v1 *paramsArray;
2189
+ unsigned int numExtSems;
2190
+ cudaStream_t stream;
2191
+ } cudaSignalExternalSemaphoresAsync_v10000_params;
2192
+
2193
+ typedef struct cudaSignalExternalSemaphoresAsync_ptsz_v10000_params_st {
2194
+ const cudaExternalSemaphore_t *extSemArray;
2195
+ const struct cudaExternalSemaphoreSignalParams_v1 *paramsArray;
2196
+ unsigned int numExtSems;
2197
+ cudaStream_t stream;
2198
+ } cudaSignalExternalSemaphoresAsync_ptsz_v10000_params;
2199
+
2200
+ typedef struct cudaSignalExternalSemaphoresAsync_v2_v11020_params_st {
2201
+ const cudaExternalSemaphore_t *extSemArray;
2202
+ const struct cudaExternalSemaphoreSignalParams *paramsArray;
2203
+ unsigned int numExtSems;
2204
+ cudaStream_t stream;
2205
+ } cudaSignalExternalSemaphoresAsync_v2_v11020_params;
2206
+
2207
+ typedef struct cudaWaitExternalSemaphoresAsync_v10000_params_st {
2208
+ const cudaExternalSemaphore_t *extSemArray;
2209
+ const struct cudaExternalSemaphoreWaitParams_v1 *paramsArray;
2210
+ unsigned int numExtSems;
2211
+ cudaStream_t stream;
2212
+ } cudaWaitExternalSemaphoresAsync_v10000_params;
2213
+
2214
+ typedef struct cudaWaitExternalSemaphoresAsync_ptsz_v10000_params_st {
2215
+ const cudaExternalSemaphore_t *extSemArray;
2216
+ const struct cudaExternalSemaphoreWaitParams_v1 *paramsArray;
2217
+ unsigned int numExtSems;
2218
+ cudaStream_t stream;
2219
+ } cudaWaitExternalSemaphoresAsync_ptsz_v10000_params;
2220
+
2221
+ typedef struct cudaWaitExternalSemaphoresAsync_v2_v11020_params_st {
2222
+ const cudaExternalSemaphore_t *extSemArray;
2223
+ const struct cudaExternalSemaphoreWaitParams *paramsArray;
2224
+ unsigned int numExtSems;
2225
+ cudaStream_t stream;
2226
+ } cudaWaitExternalSemaphoresAsync_v2_v11020_params;
2227
+
2228
+ typedef struct cudaGraphInstantiateWithParams_v12000_params_st {
2229
+ cudaGraphExec_t *pGraphExec;
2230
+ cudaGraph_t graph;
2231
+ cudaGraphInstantiateParams *instantiateParams;
2232
+ } cudaGraphInstantiateWithParams_v12000_params;
2233
+
2234
+ typedef struct cudaGraphUpload_v10000_params_st {
2235
+ cudaGraphExec_t graphExec;
2236
+ cudaStream_t stream;
2237
+ } cudaGraphUpload_v10000_params;
2238
+
2239
+ typedef struct cudaGraphLaunch_v10000_params_st {
2240
+ cudaGraphExec_t graphExec;
2241
+ cudaStream_t stream;
2242
+ } cudaGraphLaunch_v10000_params;
2243
+
2244
+ typedef struct cudaStreamBeginCapture_v10000_params_st {
2245
+ cudaStream_t stream;
2246
+ enum cudaStreamCaptureMode mode;
2247
+ } cudaStreamBeginCapture_v10000_params;
2248
+
2249
+ typedef struct cudaStreamBeginCaptureToGraph_v12030_params_st {
2250
+ cudaStream_t stream;
2251
+ cudaGraph_t graph;
2252
+ const cudaGraphNode_t *dependencies;
2253
+ const cudaGraphEdgeData *dependencyData;
2254
+ size_t numDependencies;
2255
+ enum cudaStreamCaptureMode mode;
2256
+ } cudaStreamBeginCaptureToGraph_v12030_params;
2257
+
2258
+ typedef struct cudaStreamEndCapture_v10000_params_st {
2259
+ cudaStream_t stream;
2260
+ cudaGraph_t *pGraph;
2261
+ } cudaStreamEndCapture_v10000_params;
2262
+
2263
+ typedef struct cudaStreamIsCapturing_v10000_params_st {
2264
+ cudaStream_t stream;
2265
+ enum cudaStreamCaptureStatus *pCaptureStatus;
2266
+ } cudaStreamIsCapturing_v10000_params;
2267
+
2268
+ typedef struct cudaStreamGetCaptureInfo_v10010_params_st {
2269
+ cudaStream_t stream;
2270
+ enum cudaStreamCaptureStatus *captureStatus_out;
2271
+ unsigned long long *id_out;
2272
+ } cudaStreamGetCaptureInfo_v10010_params;
2273
+
2274
+ typedef struct cudaStreamGetCaptureInfo_ptsz_v10010_params_st {
2275
+ cudaStream_t stream;
2276
+ enum cudaStreamCaptureStatus *captureStatus_out;
2277
+ unsigned long long *id_out;
2278
+ } cudaStreamGetCaptureInfo_ptsz_v10010_params;
2279
+
2280
+ typedef struct cudaStreamGetCaptureInfo_v2_v11030_params_st {
2281
+ cudaStream_t stream;
2282
+ enum cudaStreamCaptureStatus *captureStatus_out;
2283
+ unsigned long long *id_out;
2284
+ cudaGraph_t *graph_out;
2285
+ const cudaGraphNode_t **dependencies_out;
2286
+ size_t *numDependencies_out;
2287
+ } cudaStreamGetCaptureInfo_v2_v11030_params;
2288
+
2289
+ typedef struct cudaStreamGetCaptureInfo_v3_v12030_params_st {
2290
+ cudaStream_t stream;
2291
+ enum cudaStreamCaptureStatus *captureStatus_out;
2292
+ unsigned long long *id_out;
2293
+ cudaGraph_t *graph_out;
2294
+ const cudaGraphNode_t **dependencies_out;
2295
+ const cudaGraphEdgeData **edgeData_out;
2296
+ size_t *numDependencies_out;
2297
+ } cudaStreamGetCaptureInfo_v3_v12030_params;
2298
+
2299
+ typedef struct cudaStreamUpdateCaptureDependencies_v11030_params_st {
2300
+ cudaStream_t stream;
2301
+ cudaGraphNode_t *dependencies;
2302
+ size_t numDependencies;
2303
+ unsigned int flags;
2304
+ } cudaStreamUpdateCaptureDependencies_v11030_params;
2305
+
2306
+ typedef struct cudaStreamUpdateCaptureDependencies_v2_v12030_params_st {
2307
+ cudaStream_t stream;
2308
+ cudaGraphNode_t *dependencies;
2309
+ const cudaGraphEdgeData *dependencyData;
2310
+ size_t numDependencies;
2311
+ unsigned int flags;
2312
+ } cudaStreamUpdateCaptureDependencies_v2_v12030_params;
2313
+
2314
+ typedef struct cudaStreamCopyAttributes_v11000_params_st {
2315
+ cudaStream_t dstStream;
2316
+ cudaStream_t srcStream;
2317
+ } cudaStreamCopyAttributes_v11000_params;
2318
+
2319
+ typedef struct cudaStreamGetAttribute_v11000_params_st {
2320
+ cudaStream_t stream;
2321
+ cudaStreamAttrID attr;
2322
+ cudaStreamAttrValue *value;
2323
+ } cudaStreamGetAttribute_v11000_params;
2324
+
2325
+ typedef struct cudaStreamSetAttribute_v11000_params_st {
2326
+ cudaStream_t stream;
2327
+ cudaStreamAttrID attr;
2328
+ const cudaStreamAttrValue *param;
2329
+ } cudaStreamSetAttribute_v11000_params;
2330
+
2331
+ typedef struct cudaMallocAsync_v11020_params_st {
2332
+ void **devPtr;
2333
+ size_t size;
2334
+ cudaStream_t hStream;
2335
+ } cudaMallocAsync_v11020_params;
2336
+
2337
+ typedef struct cudaFreeAsync_v11020_params_st {
2338
+ void *devPtr;
2339
+ cudaStream_t hStream;
2340
+ } cudaFreeAsync_v11020_params;
2341
+
2342
+ typedef struct cudaMallocFromPoolAsync_v11020_params_st {
2343
+ void **ptr;
2344
+ size_t size;
2345
+ cudaMemPool_t memPool;
2346
+ cudaStream_t stream;
2347
+ } cudaMallocFromPoolAsync_v11020_params;
2348
+
2349
+ typedef struct cudaGetDriverEntryPoint_v11030_params_st {
2350
+ const char *symbol;
2351
+ void **funcPtr;
2352
+ unsigned long long flags;
2353
+ enum cudaDriverEntryPointQueryResult *driverStatus;
2354
+ } cudaGetDriverEntryPoint_v11030_params;
2355
+
2356
+ typedef struct cudaGetDriverEntryPointByVersion_v12050_params_st {
2357
+ const char *symbol;
2358
+ void **funcPtr;
2359
+ unsigned int cudaVersion;
2360
+ unsigned long long flags;
2361
+ enum cudaDriverEntryPointQueryResult *driverStatus;
2362
+ } cudaGetDriverEntryPointByVersion_v12050_params;
2363
+
2364
+ typedef struct cudaGetDeviceProperties_v3020_params_st {
2365
+ struct cudaDeviceProp *prop;
2366
+ int device;
2367
+ } cudaGetDeviceProperties_v3020_params;
2368
+
2369
+ // Parameter trace structures for removed functions
2370
+
2371
+
2372
+ // End of parameter trace structures
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_cuda_vdpau_interop_meta.h ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // This file is generated. Any changes you make will be lost during the next clean build.
2
+
3
+ // CUDA public interface, for type definitions and api function prototypes
4
+ #include "cuda_vdpau_interop.h"
5
+
6
+ // *************************************************************************
7
+ // Definitions of structs to hold parameters for each function
8
+ // *************************************************************************
9
+
10
+ // Currently used parameter trace structures
11
+ typedef struct cudaVDPAUGetDevice_v3020_params_st {
12
+ int *device;
13
+ VdpDevice vdpDevice;
14
+ VdpGetProcAddress *vdpGetProcAddress;
15
+ } cudaVDPAUGetDevice_v3020_params;
16
+
17
+ typedef struct cudaVDPAUSetVDPAUDevice_v3020_params_st {
18
+ int device;
19
+ VdpDevice vdpDevice;
20
+ VdpGetProcAddress *vdpGetProcAddress;
21
+ } cudaVDPAUSetVDPAUDevice_v3020_params;
22
+
23
+ typedef struct cudaGraphicsVDPAURegisterVideoSurface_v3020_params_st {
24
+ struct cudaGraphicsResource **resource;
25
+ VdpVideoSurface vdpSurface;
26
+ unsigned int flags;
27
+ } cudaGraphicsVDPAURegisterVideoSurface_v3020_params;
28
+
29
+ typedef struct cudaGraphicsVDPAURegisterOutputSurface_v3020_params_st {
30
+ struct cudaGraphicsResource **resource;
31
+ VdpOutputSurface vdpSurface;
32
+ unsigned int flags;
33
+ } cudaGraphicsVDPAURegisterOutputSurface_v3020_params;
34
+
35
+ // Parameter trace structures for removed functions
36
+
37
+
38
+ // End of parameter trace structures
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_cudart_removed_meta.h ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // This file is generated. Any changes you make will be lost during the next clean build.
2
+
3
+ // CUDA public interface, for type definitions and api function prototypes
4
+ #include "cudart_removed.h"
5
+
6
+ // *************************************************************************
7
+ // Definitions of structs to hold parameters for each function
8
+ // *************************************************************************
9
+
10
+ // Currently used parameter trace structures
11
+ typedef struct cudaStreamDestroy_v3020_params_st {
12
+ cudaStream_t stream;
13
+ } cudaStreamDestroy_v3020_params;
14
+
15
+ typedef struct cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6000_params_st {
16
+ int *numBlocks;
17
+ const void *func;
18
+ size_t numDynamicSmemBytes;
19
+ } cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6000_params;
20
+
21
+ typedef struct cudaConfigureCall_v3020_params_st {
22
+ dim3 gridDim;
23
+ dim3 blockDim;
24
+ size_t sharedMem __dv;
25
+ cudaStream_t stream __dv;
26
+ } cudaConfigureCall_v3020_params;
27
+
28
+ typedef struct cudaSetupArgument_v3020_params_st {
29
+ const void *arg;
30
+ size_t size;
31
+ size_t offset;
32
+ } cudaSetupArgument_v3020_params;
33
+
34
+ typedef struct cudaLaunch_v3020_params_st {
35
+ const void *func;
36
+ } cudaLaunch_v3020_params;
37
+
38
+ typedef struct cudaLaunch_ptsz_v7000_params_st {
39
+ const void *func;
40
+ } cudaLaunch_ptsz_v7000_params;
41
+
42
+ typedef struct cudaStreamSetFlags_v10200_params_st {
43
+ cudaStream_t hStream;
44
+ unsigned int flags;
45
+ } cudaStreamSetFlags_v10200_params;
46
+
47
+ typedef struct cudaStreamSetFlags_ptsz_v10200_params_st {
48
+ cudaStream_t hStream;
49
+ unsigned int flags;
50
+ } cudaStreamSetFlags_ptsz_v10200_params;
51
+
52
+ typedef struct cudaProfilerInitialize_v4000_params_st {
53
+ const char *configFile;
54
+ const char *outputFile;
55
+ cudaOutputMode_t outputMode;
56
+ } cudaProfilerInitialize_v4000_params;
57
+
58
+ typedef struct cudaThreadSetLimit_v3020_params_st {
59
+ enum cudaLimit limit;
60
+ size_t value;
61
+ } cudaThreadSetLimit_v3020_params;
62
+
63
+ typedef struct cudaThreadGetLimit_v3020_params_st {
64
+ size_t *pValue;
65
+ enum cudaLimit limit;
66
+ } cudaThreadGetLimit_v3020_params;
67
+
68
+ typedef struct cudaThreadGetCacheConfig_v3020_params_st {
69
+ enum cudaFuncCache *pCacheConfig;
70
+ } cudaThreadGetCacheConfig_v3020_params;
71
+
72
+ typedef struct cudaThreadSetCacheConfig_v3020_params_st {
73
+ enum cudaFuncCache cacheConfig;
74
+ } cudaThreadSetCacheConfig_v3020_params;
75
+
76
+ typedef struct cudaSetDoubleForDevice_v3020_params_st {
77
+ double *d;
78
+ } cudaSetDoubleForDevice_v3020_params;
79
+
80
+ typedef struct cudaSetDoubleForHost_v3020_params_st {
81
+ double *d;
82
+ } cudaSetDoubleForHost_v3020_params;
83
+
84
+ typedef struct cudaCreateTextureObject_v2_v11080_params_st {
85
+ cudaTextureObject_t *pTexObject;
86
+ const struct cudaResourceDesc *pResDesc;
87
+ const struct cudaTextureDesc *pTexDesc;
88
+ const struct cudaResourceViewDesc *pResViewDesc;
89
+ } cudaCreateTextureObject_v2_v11080_params;
90
+
91
+ typedef struct cudaGetTextureObjectTextureDesc_v2_v11080_params_st {
92
+ struct cudaTextureDesc *pTexDesc;
93
+ cudaTextureObject_t texObject;
94
+ } cudaGetTextureObjectTextureDesc_v2_v11080_params;
95
+
96
+ typedef struct cudaBindTexture_v3020_params_st {
97
+ size_t *offset;
98
+ const struct textureReference *texref;
99
+ const void *devPtr;
100
+ const struct cudaChannelFormatDesc *desc;
101
+ size_t size __dv;
102
+ } cudaBindTexture_v3020_params;
103
+
104
+ typedef struct cudaBindTexture2D_v3020_params_st {
105
+ size_t *offset;
106
+ const struct textureReference *texref;
107
+ const void *devPtr;
108
+ const struct cudaChannelFormatDesc *desc;
109
+ size_t width;
110
+ size_t height;
111
+ size_t pitch;
112
+ } cudaBindTexture2D_v3020_params;
113
+
114
+ typedef struct cudaBindTextureToArray_v3020_params_st {
115
+ const struct textureReference *texref;
116
+ cudaArray_const_t array;
117
+ const struct cudaChannelFormatDesc *desc;
118
+ } cudaBindTextureToArray_v3020_params;
119
+
120
+ typedef struct cudaBindTextureToMipmappedArray_v5000_params_st {
121
+ const struct textureReference *texref;
122
+ cudaMipmappedArray_const_t mipmappedArray;
123
+ const struct cudaChannelFormatDesc *desc;
124
+ } cudaBindTextureToMipmappedArray_v5000_params;
125
+
126
+ typedef struct cudaUnbindTexture_v3020_params_st {
127
+ const struct textureReference *texref;
128
+ } cudaUnbindTexture_v3020_params;
129
+
130
+ typedef struct cudaGetTextureAlignmentOffset_v3020_params_st {
131
+ size_t *offset;
132
+ const struct textureReference *texref;
133
+ } cudaGetTextureAlignmentOffset_v3020_params;
134
+
135
+ typedef struct cudaGetTextureReference_v3020_params_st {
136
+ const struct textureReference **texref;
137
+ const void *symbol;
138
+ } cudaGetTextureReference_v3020_params;
139
+
140
+ typedef struct cudaBindSurfaceToArray_v3020_params_st {
141
+ const struct surfaceReference *surfref;
142
+ cudaArray_const_t array;
143
+ const struct cudaChannelFormatDesc *desc;
144
+ } cudaBindSurfaceToArray_v3020_params;
145
+
146
+ typedef struct cudaGetSurfaceReference_v3020_params_st {
147
+ const struct surfaceReference **surfref;
148
+ const void *symbol;
149
+ } cudaGetSurfaceReference_v3020_params;
150
+
151
+ typedef struct cudaGraphInstantiate_v10000_params_st {
152
+ cudaGraphExec_t *pGraphExec;
153
+ cudaGraph_t graph;
154
+ cudaGraphNode_t *pErrorNode;
155
+ char *pLogBuffer;
156
+ size_t bufferSize;
157
+ } cudaGraphInstantiate_v10000_params;
158
+
159
+ // Parameter trace structures for removed functions
160
+
161
+
162
+ // End of parameter trace structures
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/generated_nvtx_meta.h ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2013-2018 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
51
+ #pragma GCC visibility push(default)
52
+ #endif
53
+
54
+ // *************************************************************************
55
+ // Definitions of structs to hold parameters for each function
56
+ // *************************************************************************
57
+
58
+ typedef struct nvtxMarkEx_params_st {
59
+ const nvtxEventAttributes_t* eventAttrib;
60
+ } nvtxMarkEx_params;
61
+
62
+ typedef struct nvtxMarkA_params_st {
63
+ const char* message;
64
+ } nvtxMarkA_params;
65
+
66
+ typedef struct nvtxMarkW_params_st {
67
+ const wchar_t* message;
68
+ } nvtxMarkW_params;
69
+
70
+ typedef struct nvtxRangeStartEx_params_st {
71
+ const nvtxEventAttributes_t* eventAttrib;
72
+ } nvtxRangeStartEx_params;
73
+
74
+ typedef struct nvtxRangeStartA_params_st {
75
+ const char* message;
76
+ } nvtxRangeStartA_params;
77
+
78
+ typedef struct nvtxRangeStartW_params_st {
79
+ const wchar_t* message;
80
+ } nvtxRangeStartW_params;
81
+
82
+ typedef struct nvtxRangeEnd_params_st {
83
+ nvtxRangeId_t id;
84
+ } nvtxRangeEnd_params;
85
+
86
+ typedef struct nvtxRangePushEx_params_st {
87
+ const nvtxEventAttributes_t* eventAttrib;
88
+ } nvtxRangePushEx_params;
89
+
90
+ typedef struct nvtxRangePushA_params_st {
91
+ const char* message;
92
+ } nvtxRangePushA_params;
93
+
94
+ typedef struct nvtxRangePushW_params_st {
95
+ const wchar_t* message;
96
+ } nvtxRangePushW_params;
97
+
98
+ typedef struct nvtxRangePop_params_st {
99
+ /* WAR: Windows compiler doesn't allow empty structs */
100
+ /* This field shouldn't be used */
101
+ void *dummy;
102
+ } nvtxRangePop_params;
103
+
104
+ typedef struct nvtxNameCategoryA_params_st {
105
+ uint32_t category;
106
+ const char* name;
107
+ } nvtxNameCategoryA_params;
108
+
109
+ typedef struct nvtxNameCategoryW_params_st {
110
+ uint32_t category;
111
+ const wchar_t* name;
112
+ } nvtxNameCategoryW_params;
113
+
114
+ typedef struct nvtxNameOsThreadA_params_st {
115
+ uint32_t threadId;
116
+ const char* name;
117
+ } nvtxNameOsThreadA_params;
118
+
119
+ typedef struct nvtxNameOsThreadW_params_st {
120
+ uint32_t threadId;
121
+ const wchar_t* name;
122
+ } nvtxNameOsThreadW_params;
123
+
124
+ typedef struct nvtxNameCuDeviceA_params_st {
125
+ CUdevice device;
126
+ const char* name;
127
+ } nvtxNameCuDeviceA_params;
128
+
129
+ typedef struct nvtxNameCuDeviceW_params_st {
130
+ CUdevice device;
131
+ const wchar_t* name;
132
+ } nvtxNameCuDeviceW_params;
133
+
134
+ typedef struct nvtxNameCuContextA_params_st {
135
+ CUcontext context;
136
+ const char* name;
137
+ } nvtxNameCuContextA_params;
138
+
139
+ typedef struct nvtxNameCuContextW_params_st {
140
+ CUcontext context;
141
+ const wchar_t* name;
142
+ } nvtxNameCuContextW_params;
143
+
144
+ typedef struct nvtxNameCuStreamA_params_st {
145
+ CUstream stream;
146
+ const char* name;
147
+ } nvtxNameCuStreamA_params;
148
+
149
+ typedef struct nvtxNameCuStreamW_params_st {
150
+ CUstream stream;
151
+ const wchar_t* name;
152
+ } nvtxNameCuStreamW_params;
153
+
154
+ typedef struct nvtxNameCuEventA_params_st {
155
+ CUevent event;
156
+ const char* name;
157
+ } nvtxNameCuEventA_params;
158
+
159
+ typedef struct nvtxNameCuEventW_params_st {
160
+ CUevent event;
161
+ const wchar_t* name;
162
+ } nvtxNameCuEventW_params;
163
+
164
+ typedef struct nvtxNameCudaDeviceA_params_st {
165
+ int device;
166
+ const char* name;
167
+ } nvtxNameCudaDeviceA_params;
168
+
169
+ typedef struct nvtxNameCudaDeviceW_params_st {
170
+ int device;
171
+ const wchar_t* name;
172
+ } nvtxNameCudaDeviceW_params;
173
+
174
+ typedef struct nvtxNameCudaStreamA_params_st {
175
+ cudaStream_t stream;
176
+ const char* name;
177
+ } nvtxNameCudaStreamA_params;
178
+
179
+ typedef struct nvtxNameCudaStreamW_params_st {
180
+ cudaStream_t stream;
181
+ const wchar_t* name;
182
+ } nvtxNameCudaStreamW_params;
183
+
184
+ typedef struct nvtxNameCudaEventA_params_st {
185
+ cudaEvent_t event;
186
+ const char* name;
187
+ } nvtxNameCudaEventA_params;
188
+
189
+ typedef struct nvtxNameCudaEventW_params_st {
190
+ cudaEvent_t event;
191
+ const wchar_t* name;
192
+ } nvtxNameCudaEventW_params;
193
+
194
+ typedef struct nvtxDomainCreateA_params_st {
195
+ const char* name;
196
+ } nvtxDomainCreateA_params;
197
+
198
+ typedef struct nvtxDomainDestroy_params_st {
199
+ nvtxDomainHandle_t domain;
200
+ } nvtxDomainDestroy_params;
201
+
202
+ typedef struct nvtxDomainMarkEx_params_st {
203
+ nvtxDomainHandle_t domain;
204
+ nvtxMarkEx_params core;
205
+ } nvtxDomainMarkEx_params;
206
+
207
+ typedef struct nvtxDomainRangeStartEx_params_st {
208
+ nvtxDomainHandle_t domain;
209
+ nvtxRangeStartEx_params core;
210
+ } nvtxDomainRangeStartEx_params;
211
+
212
+ typedef struct nvtxDomainRangeEnd_params_st {
213
+ nvtxDomainHandle_t domain;
214
+ nvtxRangeEnd_params core;
215
+ } nvtxDomainRangeEnd_params;
216
+
217
+ typedef struct nvtxDomainRangePushEx_params_st {
218
+ nvtxDomainHandle_t domain;
219
+ nvtxRangePushEx_params core;
220
+ } nvtxDomainRangePushEx_params;
221
+
222
+ typedef struct nvtxDomainRangePop_params_st {
223
+ nvtxDomainHandle_t domain;
224
+ } nvtxDomainRangePop_params;
225
+
226
+ typedef struct nvtxSyncUserCreate_params_st {
227
+ nvtxDomainHandle_t domain;
228
+ const nvtxSyncUserAttributes_t* attribs;
229
+ } nvtxSyncUserCreate_params;
230
+
231
+ typedef struct nvtxSyncUserCommon_params_st {
232
+ nvtxSyncUser_t handle;
233
+ } nvtxSyncUserCommon_params;
234
+
235
+ typedef struct nvtxDomainRegisterStringA_params_st {
236
+ nvtxDomainHandle_t domain;
237
+ const char* string;
238
+ } nvtxDomainRegisterStringA_params;
239
+
240
+ typedef struct nvtxDomainRegisterStringW_params_st {
241
+ nvtxDomainHandle_t domain;
242
+ const char* string;
243
+ } nvtxDomainRegisterStringW_params;
244
+
245
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
246
+ #pragma GCC visibility pop
247
+ #endif
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/nvperf_common.h ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef NVPERF_COMMON_H
2
+ #define NVPERF_COMMON_H
3
+
4
+ /*
5
+ * Copyright 2014-2024 NVIDIA Corporation. All rights reserved.
6
+ *
7
+ * NOTICE TO USER:
8
+ *
9
+ * This source code is subject to NVIDIA ownership rights under U.S. and
10
+ * international Copyright laws.
11
+ *
12
+ * This software and the information contained herein is PROPRIETARY and
13
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
14
+ * of a form of NVIDIA software license agreement.
15
+ *
16
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
17
+ * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
18
+ * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
19
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
20
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
21
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
22
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
23
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
24
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
25
+ * OR PERFORMANCE OF THIS SOURCE CODE.
26
+ *
27
+ * U.S. Government End Users. This source code is a "commercial item" as
28
+ * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
29
+ * "commercial computer software" and "commercial computer software
30
+ * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
31
+ * and is provided to the U.S. Government only as a commercial end item.
32
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
33
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
34
+ * source code with only those rights set forth herein.
35
+ *
36
+ * Any use of this source code in individual and commercial software must
37
+ * include, in the user documentation and internal comments to the code,
38
+ * the above Disclaimer and U.S. Government End Users Notice.
39
+ */
40
+
41
+ #include <stddef.h>
42
+ #include <stdint.h>
43
+
44
+ #if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
45
+ #pragma GCC visibility push(default)
46
+ #if !defined(NVPW_LOCAL)
47
+ #define NVPW_LOCAL __attribute__ ((visibility ("hidden")))
48
+ #endif
49
+ #else
50
+ #if !defined(NVPW_LOCAL)
51
+ #define NVPW_LOCAL
52
+ #endif
53
+ #endif
54
+
55
+ #ifdef __cplusplus
56
+ extern "C" {
57
+ #endif
58
+
59
+ /**
60
+ * @file nvperf_common.h
61
+ */
62
+
63
+ #ifndef NVPERF_NVPA_STATUS_DEFINED
64
+ #define NVPERF_NVPA_STATUS_DEFINED
65
+
66
+ /// Error codes.
67
+ typedef enum NVPA_Status
68
+ {
69
+ /// Success
70
+ NVPA_STATUS_SUCCESS = 0,
71
+ /// Generic error.
72
+ NVPA_STATUS_ERROR = 1,
73
+ /// Internal error. Please file a bug!
74
+ NVPA_STATUS_INTERNAL_ERROR = 2,
75
+ /// NVPW_InitializeTarget() or NVPW_InitializeHost() has not been called yet.
76
+ NVPA_STATUS_NOT_INITIALIZED = 3,
77
+ /// The NvPerf DLL/DSO could not be loaded during NVPW_Initialize*(). Please ensure they are placed in the
78
+ /// appropriate location that can be founder by a dynamic linker. And on Linux systems, confirm that the
79
+ /// LD_LIBRARY_PATH environment variable is set correctly. Alternatively, you may utilize
80
+ /// NVPW_SetLibraryLoadPaths() to define additional library search paths.
81
+ NVPA_STATUS_NOT_LOADED = 4,
82
+ /// The function was not found in this version of the NvPerf DLL/DSO. Or if you are directly calling
83
+ /// NVPA_GetProcAddress(), please ensure the function name is spelled correctly.
84
+ NVPA_STATUS_FUNCTION_NOT_FOUND = 5,
85
+ /// The request was intentionally not supported.
86
+ NVPA_STATUS_NOT_SUPPORTED = 6,
87
+ /// The request was not implemented by this version.
88
+ NVPA_STATUS_NOT_IMPLEMENTED = 7,
89
+ /// Invalid argument.
90
+ NVPA_STATUS_INVALID_ARGUMENT = 8,
91
+ /// UNUSED
92
+ NVPA_STATUS_INVALID_METRIC_ID = 9,
93
+ /// No driver has been loaded via NVPW_*_LoadDriver().
94
+ NVPA_STATUS_DRIVER_NOT_LOADED = 10,
95
+ /// Failed memory allocation.
96
+ NVPA_STATUS_OUT_OF_MEMORY = 11,
97
+ /// UNUSED
98
+ NVPA_STATUS_INVALID_THREAD_STATE = 12,
99
+ /// UNUSED
100
+ NVPA_STATUS_FAILED_CONTEXT_ALLOC = 13,
101
+ /// The specified GPU is not supported. It is recommended to call IsGpuSupported() for more information
102
+ NVPA_STATUS_UNSUPPORTED_GPU = 14,
103
+ /// The installed NVIDIA driver is too old.
104
+ NVPA_STATUS_INSUFFICIENT_DRIVER_VERSION = 15,
105
+ /// UNUSED
106
+ NVPA_STATUS_OBJECT_NOT_REGISTERED = 16,
107
+ /// Profiling permission not granted; see https://developer.nvidia.com/nvidia-development-tools-solutions-
108
+ /// ERR_NVGPUCTRPERM-permission-issue-performance-counters
109
+ NVPA_STATUS_INSUFFICIENT_PRIVILEGE = 17,
110
+ /// UNUSED
111
+ NVPA_STATUS_INVALID_CONTEXT_STATE = 18,
112
+ /// UNUSED
113
+ NVPA_STATUS_INVALID_OBJECT_STATE = 19,
114
+ /// The request could not be fulfilled because a system resource is already in use.
115
+ NVPA_STATUS_RESOURCE_UNAVAILABLE = 20,
116
+ /// UNUSED
117
+ NVPA_STATUS_DRIVER_LOADED_TOO_LATE = 21,
118
+ /// The provided buffer is not large enough.
119
+ NVPA_STATUS_INSUFFICIENT_SPACE = 22,
120
+ /// UNUSED
121
+ NVPA_STATUS_OBJECT_MISMATCH = 23,
122
+ /// Virtualized GPU (vGPU) is not supported.
123
+ NVPA_STATUS_VIRTUALIZED_DEVICE_NOT_SUPPORTED = 24,
124
+ /// Profiling permission was not granted or the device was disabled.
125
+ NVPA_STATUS_PROFILING_NOT_ALLOWED = 25,
126
+ NVPA_STATUS__COUNT
127
+ } NVPA_Status;
128
+
129
+
130
+ inline void NVPW_NVPAStatusToString(NVPA_Status status, const char** ppStatusStr, const char** ppCommentStr)
131
+ {
132
+ switch (status)
133
+ {
134
+ case NVPA_STATUS_SUCCESS:
135
+ *ppStatusStr = "NVPA_STATUS_SUCCESS";
136
+ *ppCommentStr = "Success";
137
+ return;
138
+ case NVPA_STATUS_ERROR:
139
+ *ppStatusStr = "NVPA_STATUS_ERROR";
140
+ *ppCommentStr = "Generic error.";
141
+ return;
142
+ case NVPA_STATUS_INTERNAL_ERROR:
143
+ *ppStatusStr = "NVPA_STATUS_INTERNAL_ERROR";
144
+ *ppCommentStr = "Internal error. Please file a bug!";
145
+ return;
146
+ case NVPA_STATUS_NOT_INITIALIZED:
147
+ *ppStatusStr = "NVPA_STATUS_NOT_INITIALIZED";
148
+ *ppCommentStr = "NVPW_InitializeTarget() or NVPW_InitializeHost() has not been called yet.";
149
+ return;
150
+ case NVPA_STATUS_NOT_LOADED:
151
+ *ppStatusStr = "NVPA_STATUS_NOT_LOADED";
152
+ *ppCommentStr = "The NvPerf DLL/DSO could not be loaded during NVPW_Initialize*(). Please ensure they are placed in the appropriate location that can be founder by a dynamic linker. And on Linux systems, confirm that the LD_LIBRARY_PATH environment variable is set correctly. Alternatively, you may utilize NVPW_SetLibraryLoadPaths() to define additional library search paths.";
153
+ return;
154
+ case NVPA_STATUS_FUNCTION_NOT_FOUND:
155
+ *ppStatusStr = "NVPA_STATUS_FUNCTION_NOT_FOUND";
156
+ *ppCommentStr = "The function was not found in this version of the NvPerf DLL/DSO. Or if you are directly calling NVPA_GetProcAddress(), please ensure the function name is spelled correctly.";
157
+ return;
158
+ case NVPA_STATUS_NOT_SUPPORTED:
159
+ *ppStatusStr = "NVPA_STATUS_NOT_SUPPORTED";
160
+ *ppCommentStr = "The request was intentionally not supported.";
161
+ return;
162
+ case NVPA_STATUS_NOT_IMPLEMENTED:
163
+ *ppStatusStr = "NVPA_STATUS_NOT_IMPLEMENTED";
164
+ *ppCommentStr = "The request was not implemented by this version.";
165
+ return;
166
+ case NVPA_STATUS_INVALID_ARGUMENT:
167
+ *ppStatusStr = "NVPA_STATUS_INVALID_ARGUMENT";
168
+ *ppCommentStr = "Invalid argument.";
169
+ return;
170
+ case NVPA_STATUS_INVALID_METRIC_ID:
171
+ *ppStatusStr = "NVPA_STATUS_INVALID_METRIC_ID";
172
+ *ppCommentStr = "UNUSED";
173
+ return;
174
+ case NVPA_STATUS_DRIVER_NOT_LOADED:
175
+ *ppStatusStr = "NVPA_STATUS_DRIVER_NOT_LOADED";
176
+ *ppCommentStr = "No driver has been loaded via NVPW_*_LoadDriver().";
177
+ return;
178
+ case NVPA_STATUS_OUT_OF_MEMORY:
179
+ *ppStatusStr = "NVPA_STATUS_OUT_OF_MEMORY";
180
+ *ppCommentStr = "Failed memory allocation.";
181
+ return;
182
+ case NVPA_STATUS_INVALID_THREAD_STATE:
183
+ *ppStatusStr = "NVPA_STATUS_INVALID_THREAD_STATE";
184
+ *ppCommentStr = "UNUSED";
185
+ return;
186
+ case NVPA_STATUS_FAILED_CONTEXT_ALLOC:
187
+ *ppStatusStr = "NVPA_STATUS_FAILED_CONTEXT_ALLOC";
188
+ *ppCommentStr = "UNUSED";
189
+ return;
190
+ case NVPA_STATUS_UNSUPPORTED_GPU:
191
+ *ppStatusStr = "NVPA_STATUS_UNSUPPORTED_GPU";
192
+ *ppCommentStr = "The specified GPU is not supported. It is recommended to call IsGpuSupported() for more information";
193
+ return;
194
+ case NVPA_STATUS_INSUFFICIENT_DRIVER_VERSION:
195
+ *ppStatusStr = "NVPA_STATUS_INSUFFICIENT_DRIVER_VERSION";
196
+ *ppCommentStr = "The installed NVIDIA driver is too old.";
197
+ return;
198
+ case NVPA_STATUS_OBJECT_NOT_REGISTERED:
199
+ *ppStatusStr = "NVPA_STATUS_OBJECT_NOT_REGISTERED";
200
+ *ppCommentStr = "UNUSED";
201
+ return;
202
+ case NVPA_STATUS_INSUFFICIENT_PRIVILEGE:
203
+ *ppStatusStr = "NVPA_STATUS_INSUFFICIENT_PRIVILEGE";
204
+ *ppCommentStr = "Profiling permission not granted; see https://developer.nvidia.com/nvidia-development-tools-solutions-ERR_NVGPUCTRPERM-permission-issue-performance-counters";
205
+ return;
206
+ case NVPA_STATUS_INVALID_CONTEXT_STATE:
207
+ *ppStatusStr = "NVPA_STATUS_INVALID_CONTEXT_STATE";
208
+ *ppCommentStr = "UNUSED";
209
+ return;
210
+ case NVPA_STATUS_INVALID_OBJECT_STATE:
211
+ *ppStatusStr = "NVPA_STATUS_INVALID_OBJECT_STATE";
212
+ *ppCommentStr = "UNUSED";
213
+ return;
214
+ case NVPA_STATUS_RESOURCE_UNAVAILABLE:
215
+ *ppStatusStr = "NVPA_STATUS_RESOURCE_UNAVAILABLE";
216
+ *ppCommentStr = "The request could not be fulfilled because a system resource is already in use.";
217
+ return;
218
+ case NVPA_STATUS_DRIVER_LOADED_TOO_LATE:
219
+ *ppStatusStr = "NVPA_STATUS_DRIVER_LOADED_TOO_LATE";
220
+ *ppCommentStr = "UNUSED";
221
+ return;
222
+ case NVPA_STATUS_INSUFFICIENT_SPACE:
223
+ *ppStatusStr = "NVPA_STATUS_INSUFFICIENT_SPACE";
224
+ *ppCommentStr = "The provided buffer is not large enough.";
225
+ return;
226
+ case NVPA_STATUS_OBJECT_MISMATCH:
227
+ *ppStatusStr = "NVPA_STATUS_OBJECT_MISMATCH";
228
+ *ppCommentStr = "UNUSED";
229
+ return;
230
+ case NVPA_STATUS_VIRTUALIZED_DEVICE_NOT_SUPPORTED:
231
+ *ppStatusStr = "NVPA_STATUS_VIRTUALIZED_DEVICE_NOT_SUPPORTED";
232
+ *ppCommentStr = "Virtualized GPU (vGPU) is not supported.";
233
+ return;
234
+ case NVPA_STATUS_PROFILING_NOT_ALLOWED:
235
+ *ppStatusStr = "NVPA_STATUS_PROFILING_NOT_ALLOWED";
236
+ *ppCommentStr = "Profiling permission was not granted or the device was disabled.";
237
+ return;
238
+ default:
239
+ *ppStatusStr = "Unrecognized status";
240
+ *ppCommentStr = "This status is unrecognized. Is it coming from a newer version of NvPerf library?";
241
+ return;
242
+ }
243
+ }
244
+
245
+
246
+ #endif // NVPERF_NVPA_STATUS_DEFINED
247
+
248
+
249
+ #ifndef NVPERF_NVPA_ACTIVITY_KIND_DEFINED
250
+ #define NVPERF_NVPA_ACTIVITY_KIND_DEFINED
251
+
252
+ /// The configuration's activity-kind dictates which types of data may be collected.
253
+ typedef enum NVPA_ActivityKind
254
+ {
255
+ /// Invalid value.
256
+ NVPA_ACTIVITY_KIND_INVALID = 0,
257
+ /// A workload-centric activity for serialized and pipelined collection.
258
+ ///
259
+ /// Profiler is capable of collecting both serialized and pipelined metrics. The library introduces any
260
+ /// synchronization required to collect serialized metrics.
261
+ NVPA_ACTIVITY_KIND_PROFILER,
262
+ /// A realtime activity for sampling counters from the CPU or GPU.
263
+ NVPA_ACTIVITY_KIND_REALTIME_SAMPLED,
264
+ /// A realtime activity for profiling counters from the CPU or GPU without CPU/GPU synchronizations.
265
+ NVPA_ACTIVITY_KIND_REALTIME_PROFILER,
266
+ NVPA_ACTIVITY_KIND__COUNT
267
+ } NVPA_ActivityKind;
268
+
269
+
270
+ #endif // NVPERF_NVPA_ACTIVITY_KIND_DEFINED
271
+
272
+
273
+ #ifndef NVPERF_NVPA_BOOL_DEFINED
274
+ #define NVPERF_NVPA_BOOL_DEFINED
275
+ /// The type used for boolean values.
276
+ typedef uint8_t NVPA_Bool;
277
+ #endif // NVPERF_NVPA_BOOL_DEFINED
278
+
279
+ #ifndef NVPA_STRUCT_SIZE
280
+ #define NVPA_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
281
+ #endif // NVPA_STRUCT_SIZE
282
+
283
+ #ifndef NVPW_FIELD_EXISTS
284
+ #define NVPW_FIELD_EXISTS(pParams_, name_) \
285
+ ((pParams_)->structSize >= (size_t)((const uint8_t*)(&(pParams_)->name_) + sizeof(pParams_)->name_ - (const uint8_t*)(pParams_)))
286
+ #endif // NVPW_FIELD_EXISTS
287
+
288
+
289
+ #ifndef NVPERF_NVPA_GETPROCADDRESS_DEFINED
290
+ #define NVPERF_NVPA_GETPROCADDRESS_DEFINED
291
+
292
+ typedef NVPA_Status (*NVPA_GenericFn)(void);
293
+
294
+
295
+ ///
296
+ /// Gets the address of an NvPerf API function.
297
+ ///
298
+ /// \return A function pointer to the function, or NULL if the function is not available.
299
+ ///
300
+ /// \param pFunctionName [in] Name of the function to retrieve.
301
+ NVPA_GenericFn NVPA_GetProcAddress(const char* pFunctionName);
302
+
303
+ #endif
304
+
305
+ #ifndef NVPERF_NVPW_SETLIBRARYLOADPATHS_DEFINED
306
+ #define NVPERF_NVPW_SETLIBRARYLOADPATHS_DEFINED
307
+
308
+
309
+ typedef struct NVPW_SetLibraryLoadPaths_Params
310
+ {
311
+ /// [in]
312
+ size_t structSize;
313
+ /// [in] assign to NULL
314
+ void* pPriv;
315
+ /// [in] number of paths in ppPaths
316
+ size_t numPaths;
317
+ /// [in] array of null-terminated paths
318
+ const char** ppPaths;
319
+ } NVPW_SetLibraryLoadPaths_Params;
320
+ #define NVPW_SetLibraryLoadPaths_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_SetLibraryLoadPaths_Params, ppPaths)
321
+
322
+ /// Sets library search path for \ref NVPW_InitializeHost() and \ref NVPW_InitializeTarget().
323
+ /// \ref NVPW_InitializeHost() and \ref NVPW_InitializeTarget load the NvPerf DLL/DSO. This function sets
324
+ /// ordered paths that will be searched with the LoadLibrary() or dlopen() call.
325
+ /// If load paths are set by this function, the default set of load paths
326
+ /// will not be attempted.
327
+ /// Each path must point at a directory (not a file name).
328
+ /// This function is not thread-safe.
329
+ /// Example Usage:
330
+ /// \code
331
+ /// const char* paths[] = {
332
+ /// "path1", "path2", etc
333
+ /// };
334
+ /// NVPW_SetLibraryLoadPaths_Params params{NVPW_SetLibraryLoadPaths_Params_STRUCT_SIZE};
335
+ /// params.numPaths = sizeof(paths)/sizeof(paths[0]);
336
+ /// params.ppPaths = paths;
337
+ /// NVPW_SetLibraryLoadPaths(&params);
338
+ /// NVPW_InitializeHost();
339
+ /// params.numPaths = 0;
340
+ /// params.ppPaths = NULL;
341
+ /// NVPW_SetLibraryLoadPaths(&params);
342
+ /// \endcode
343
+ NVPA_Status NVPW_SetLibraryLoadPaths(NVPW_SetLibraryLoadPaths_Params* pParams);
344
+
345
+ typedef struct NVPW_SetLibraryLoadPathsW_Params
346
+ {
347
+ /// [in]
348
+ size_t structSize;
349
+ /// [in] assign to NULL
350
+ void* pPriv;
351
+ /// [in] number of paths in ppwPaths
352
+ size_t numPaths;
353
+ /// [in] array of null-terminated paths
354
+ const wchar_t** ppwPaths;
355
+ } NVPW_SetLibraryLoadPathsW_Params;
356
+ #define NVPW_SetLibraryLoadPathsW_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_SetLibraryLoadPathsW_Params, ppwPaths)
357
+
358
+ /// Sets library search path for \ref NVPW_InitializeHost() and \ref NVPW_InitializeTarget().
359
+ /// \ref NVPW_InitializeHost() and \ref NVPW_InitializeTarget load the NvPerf DLL/DSO. This function sets
360
+ /// ordered paths that will be searched with the LoadLibrary() or dlopen() call.
361
+ /// If load paths are set by this function, the default set of load paths
362
+ /// will not be attempted.
363
+ /// Each path must point at a directory (not a file name).
364
+ /// This function is not thread-safe.
365
+ /// Example Usage:
366
+ /// \code
367
+ /// const wchar_t* wpaths[] = {
368
+ /// L"path1", L"path2", etc
369
+ /// };
370
+ /// NVPW_SetLibraryLoadPathsW_Params params{NVPW_SetLibraryLoadPathsW_Params_STRUCT_SIZE};
371
+ /// params.numPaths = sizeof(wpaths)/sizeof(wpaths[0]);
372
+ /// params.ppwPaths = wpaths;
373
+ /// NVPW_SetLibraryLoadPathsW(&params);
374
+ /// NVPW_InitializeHost();
375
+ /// params.numPaths = 0;
376
+ /// params.ppwPaths = NULL;
377
+ /// NVPW_SetLibraryLoadPathsW(&params);
378
+ /// \endcode
379
+ NVPA_Status NVPW_SetLibraryLoadPathsW(NVPW_SetLibraryLoadPathsW_Params* pParams);
380
+
381
+ #endif
382
+
383
+
384
+
385
+ #ifdef __cplusplus
386
+ } // extern "C"
387
+ #endif
388
+
389
+ #if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
390
+ #pragma GCC visibility pop
391
+ #endif
392
+
393
+ #endif // NVPERF_COMMON_H
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/nvperf_cuda_host.h ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef NVPERF_CUDA_HOST_H
2
+ #define NVPERF_CUDA_HOST_H
3
+
4
+ /*
5
+ * Copyright 2014-2024 NVIDIA Corporation. All rights reserved.
6
+ *
7
+ * NOTICE TO USER:
8
+ *
9
+ * This source code is subject to NVIDIA ownership rights under U.S. and
10
+ * international Copyright laws.
11
+ *
12
+ * This software and the information contained herein is PROPRIETARY and
13
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
14
+ * of a form of NVIDIA software license agreement.
15
+ *
16
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
17
+ * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
18
+ * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
19
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
20
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
21
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
22
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
23
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
24
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
25
+ * OR PERFORMANCE OF THIS SOURCE CODE.
26
+ *
27
+ * U.S. Government End Users. This source code is a "commercial item" as
28
+ * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
29
+ * "commercial computer software" and "commercial computer software
30
+ * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
31
+ * and is provided to the U.S. Government only as a commercial end item.
32
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
33
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
34
+ * source code with only those rights set forth herein.
35
+ *
36
+ * Any use of this source code in individual and commercial software must
37
+ * include, in the user documentation and internal comments to the code,
38
+ * the above Disclaimer and U.S. Government End Users Notice.
39
+ */
40
+
41
+ #include <stddef.h>
42
+ #include <stdint.h>
43
+ #include "nvperf_common.h"
44
+ #include "nvperf_host.h"
45
+
46
+ #if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
47
+ #pragma GCC visibility push(default)
48
+ #if !defined(NVPW_LOCAL)
49
+ #define NVPW_LOCAL __attribute__ ((visibility ("hidden")))
50
+ #endif
51
+ #else
52
+ #if !defined(NVPW_LOCAL)
53
+ #define NVPW_LOCAL
54
+ #endif
55
+ #endif
56
+
57
+ #ifdef __cplusplus
58
+ extern "C" {
59
+ #endif
60
+
61
+ /**
62
+ * @file nvperf_cuda_host.h
63
+ */
64
+
65
+ typedef struct NVPW_CUDA_RawMetricsConfig_Create_Params
66
+ {
67
+ /// [in]
68
+ size_t structSize;
69
+ /// [in] assign to NULL
70
+ void* pPriv;
71
+ /// [in]
72
+ NVPA_ActivityKind activityKind;
73
+ /// [in]
74
+ const char* pChipName;
75
+ /// [out] new NVPA_RawMetricsConfig object
76
+ struct NVPA_RawMetricsConfig* pRawMetricsConfig;
77
+ } NVPW_CUDA_RawMetricsConfig_Create_Params;
78
+ #define NVPW_CUDA_RawMetricsConfig_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_RawMetricsConfig_Create_Params, pRawMetricsConfig)
79
+
80
+ NVPA_Status NVPW_CUDA_RawMetricsConfig_Create(NVPW_CUDA_RawMetricsConfig_Create_Params* pParams);
81
+
82
+ typedef struct NVPW_CUDA_RawMetricsConfig_Create_V2_Params
83
+ {
84
+ /// [in]
85
+ size_t structSize;
86
+ /// [in] assign to NULL
87
+ void* pPriv;
88
+ /// [in]
89
+ NVPA_ActivityKind activityKind;
90
+ /// [in] accepted for chips supported at the time-of-release.
91
+ const char* pChipName;
92
+ /// [in] buffer with counter availability image - required for future chip support
93
+ const uint8_t* pCounterAvailabilityImage;
94
+ /// [out] new NVPA_RawMetricsConfig object
95
+ struct NVPA_RawMetricsConfig* pRawMetricsConfig;
96
+ } NVPW_CUDA_RawMetricsConfig_Create_V2_Params;
97
+ #define NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_RawMetricsConfig_Create_V2_Params, pRawMetricsConfig)
98
+
99
+ /// Use either 'pChipName' or 'pCounterAvailabilityImage'.
100
+ NVPA_Status NVPW_CUDA_RawMetricsConfig_Create_V2(NVPW_CUDA_RawMetricsConfig_Create_V2_Params* pParams);
101
+
102
+ typedef struct NVPW_CUDA_CounterDataBuilder_Create_Params
103
+ {
104
+ /// [in]
105
+ size_t structSize;
106
+ /// [in] assign to NULL
107
+ void* pPriv;
108
+ /// [in] accepted for chips supported at the time-of-release.
109
+ const char* pChipName;
110
+ /// [in] buffer with counter availability image - required for future chip support
111
+ const uint8_t* pCounterAvailabilityImage;
112
+ /// [out] new NVPA_CounterDataBuilder object
113
+ struct NVPA_CounterDataBuilder* pCounterDataBuilder;
114
+ } NVPW_CUDA_CounterDataBuilder_Create_Params;
115
+ #define NVPW_CUDA_CounterDataBuilder_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_CounterDataBuilder_Create_Params, pCounterDataBuilder)
116
+
117
+ /// Use either 'pChipName' or 'pCounterAvailabilityImage'.
118
+ NVPA_Status NVPW_CUDA_CounterDataBuilder_Create(NVPW_CUDA_CounterDataBuilder_Create_Params* pParams);
119
+
120
+ typedef struct NVPW_MetricsEvaluator NVPW_MetricsEvaluator;
121
+
122
+ typedef struct NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params
123
+ {
124
+ /// [in]
125
+ size_t structSize;
126
+ /// [in] assign to NULL
127
+ void* pPriv;
128
+ /// [in] accepted for chips supported at the time-of-release.
129
+ const char* pChipName;
130
+ /// [in] buffer with counter availability image - required for future chip support
131
+ const uint8_t* pCounterAvailabilityImage;
132
+ /// [out]
133
+ size_t scratchBufferSize;
134
+ } NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params;
135
+ #define NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params, scratchBufferSize)
136
+
137
+ /// Use either 'pChipName' or 'pCounterAvailabilityImage'.
138
+ NVPA_Status NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params* pParams);
139
+
140
+ typedef struct NVPW_CUDA_MetricsEvaluator_Initialize_Params
141
+ {
142
+ /// [in]
143
+ size_t structSize;
144
+ /// [in] assign to NULL
145
+ void* pPriv;
146
+ /// [in]
147
+ uint8_t* pScratchBuffer;
148
+ /// [in] the size of the 'pScratchBuffer' array, should be at least the size of the 'scratchBufferSize' returned
149
+ /// by 'NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize'
150
+ size_t scratchBufferSize;
151
+ /// [in] accepted for chips supported at the time-of-release.
152
+ const char* pChipName;
153
+ /// [in] buffer with counter availability image - required for future chip support
154
+ const uint8_t* pCounterAvailabilityImage;
155
+ /// [in]
156
+ const uint8_t* pCounterDataImage;
157
+ /// [in] must be provided if 'pCounterDataImage' is not NULL
158
+ size_t counterDataImageSize;
159
+ /// [out]
160
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
161
+ } NVPW_CUDA_MetricsEvaluator_Initialize_Params;
162
+ #define NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_MetricsEvaluator_Initialize_Params, pMetricsEvaluator)
163
+
164
+ /// Use one of 'pChipName', 'pCounterAvailabilityImage', or 'pCounterDataImage'. 'pChipName' or
165
+ /// 'pCounterAvailabilityImage' will create a metrics evaluator based on a virtual device while 'pCounterDataImage'
166
+ /// will create a metrics evaluator based on the actual device.
167
+ NVPA_Status NVPW_CUDA_MetricsEvaluator_Initialize(NVPW_CUDA_MetricsEvaluator_Initialize_Params* pParams);
168
+
169
+
170
+
171
+ #ifdef __cplusplus
172
+ } // extern "C"
173
+ #endif
174
+
175
+ #if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
176
+ #pragma GCC visibility pop
177
+ #endif
178
+
179
+ #endif // NVPERF_CUDA_HOST_H
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/nvperf_host.h ADDED
@@ -0,0 +1,1178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef NVPERF_HOST_H
2
+ #define NVPERF_HOST_H
3
+
4
+ /*
5
+ * Copyright 2014-2024 NVIDIA Corporation. All rights reserved.
6
+ *
7
+ * NOTICE TO USER:
8
+ *
9
+ * This source code is subject to NVIDIA ownership rights under U.S. and
10
+ * international Copyright laws.
11
+ *
12
+ * This software and the information contained herein is PROPRIETARY and
13
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
14
+ * of a form of NVIDIA software license agreement.
15
+ *
16
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
17
+ * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
18
+ * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
19
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
20
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
21
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
22
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
23
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
24
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
25
+ * OR PERFORMANCE OF THIS SOURCE CODE.
26
+ *
27
+ * U.S. Government End Users. This source code is a "commercial item" as
28
+ * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
29
+ * "commercial computer software" and "commercial computer software
30
+ * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
31
+ * and is provided to the U.S. Government only as a commercial end item.
32
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
33
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
34
+ * source code with only those rights set forth herein.
35
+ *
36
+ * Any use of this source code in individual and commercial software must
37
+ * include, in the user documentation and internal comments to the code,
38
+ * the above Disclaimer and U.S. Government End Users Notice.
39
+ */
40
+
41
+ #include <stddef.h>
42
+ #include <stdint.h>
43
+ #include "nvperf_common.h"
44
+
45
+ #if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
46
+ #pragma GCC visibility push(default)
47
+ #if !defined(NVPW_LOCAL)
48
+ #define NVPW_LOCAL __attribute__ ((visibility ("hidden")))
49
+ #endif
50
+ #else
51
+ #if !defined(NVPW_LOCAL)
52
+ #define NVPW_LOCAL
53
+ #endif
54
+ #endif
55
+
56
+ #ifdef __cplusplus
57
+ extern "C" {
58
+ #endif
59
+
60
+ /**
61
+ * @file nvperf_host.h
62
+ */
63
+
64
+
65
+ // Guard against multiple definition of NvPerf host types
66
+ #ifndef NVPERF_HOST_API_DEFINED
67
+ #define NVPERF_HOST_API_DEFINED
68
+
69
+
70
+ /***************************************************************************//**
71
+ * @name Host Configuration
72
+ * @{
73
+ */
74
+
75
+ typedef struct NVPW_InitializeHost_Params
76
+ {
77
+ /// [in]
78
+ size_t structSize;
79
+ /// [in] assign to NULL
80
+ void* pPriv;
81
+ } NVPW_InitializeHost_Params;
82
+ #define NVPW_InitializeHost_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_InitializeHost_Params, pPriv)
83
+
84
+ /// Load the host library.
85
+ NVPA_Status NVPW_InitializeHost(NVPW_InitializeHost_Params* pParams);
86
+
87
+ typedef struct NVPW_CounterData_CalculateCounterDataImageCopySize_Params
88
+ {
89
+ /// [in]
90
+ size_t structSize;
91
+ /// [in] assign to NULL
92
+ void* pPriv;
93
+ /// The CounterDataPrefix generated from e.g. nvperf2 initdata or
94
+ /// NVPW_CounterDataBuilder_GetCounterDataPrefix(). Must be align(8).
95
+ const uint8_t* pCounterDataPrefix;
96
+ size_t counterDataPrefixSize;
97
+ /// max number of ranges that can be profiled
98
+ uint32_t maxNumRanges;
99
+ /// max number of RangeTree nodes; must be >= maxNumRanges
100
+ uint32_t maxNumRangeTreeNodes;
101
+ /// max string length of each RangeName, including the trailing NUL character
102
+ uint32_t maxRangeNameLength;
103
+ const uint8_t* pCounterDataSrc;
104
+ /// [out] required size of the copy buffer
105
+ size_t copyDataImageCounterSize;
106
+ } NVPW_CounterData_CalculateCounterDataImageCopySize_Params;
107
+ #define NVPW_CounterData_CalculateCounterDataImageCopySize_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterData_CalculateCounterDataImageCopySize_Params, copyDataImageCounterSize)
108
+
109
+ NVPA_Status NVPW_CounterData_CalculateCounterDataImageCopySize(NVPW_CounterData_CalculateCounterDataImageCopySize_Params* pParams);
110
+
111
+ typedef struct NVPW_CounterData_InitializeCounterDataImageCopy_Params
112
+ {
113
+ /// [in]
114
+ size_t structSize;
115
+ /// [in] assign to NULL
116
+ void* pPriv;
117
+ /// The CounterDataPrefix generated from e.g. nvperf2 initdata or
118
+ /// NVPW_CounterDataBuilder_GetCounterDataPrefix(). Must be align(8).
119
+ const uint8_t* pCounterDataPrefix;
120
+ size_t counterDataPrefixSize;
121
+ /// max number of ranges that can be profiled
122
+ uint32_t maxNumRanges;
123
+ /// max number of RangeTree nodes; must be >= maxNumRanges
124
+ uint32_t maxNumRangeTreeNodes;
125
+ /// max string length of each RangeName, including the trailing NUL character
126
+ uint32_t maxRangeNameLength;
127
+ const uint8_t* pCounterDataSrc;
128
+ uint8_t* pCounterDataDst;
129
+ } NVPW_CounterData_InitializeCounterDataImageCopy_Params;
130
+ #define NVPW_CounterData_InitializeCounterDataImageCopy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterData_InitializeCounterDataImageCopy_Params, pCounterDataDst)
131
+
132
+ NVPA_Status NVPW_CounterData_InitializeCounterDataImageCopy(NVPW_CounterData_InitializeCounterDataImageCopy_Params* pParams);
133
+
134
+ typedef struct NVPW_CounterData_ExtractCounterDataPrefix_Params
135
+ {
136
+ /// [in]
137
+ size_t structSize;
138
+ /// [in] assign to NULL
139
+ void* pPriv;
140
+ /// The source buffer to extract the prefix from.
141
+ const uint8_t* pCounterDataSrc;
142
+ size_t counterDataSrcSize;
143
+ /// [in] If not NULL, the prefix will be copied into this buffer.
144
+ uint8_t* pCounterDataPrefix;
145
+ /// [inout] if 'pCounterDataPrefix' is NULL, size of counter data prefix will be returned; otherwise it should
146
+ /// be set to the size of buffer allocated for 'pCounterDataPrefix'.
147
+ size_t counterDataPrefixSize;
148
+ } NVPW_CounterData_ExtractCounterDataPrefix_Params;
149
+ #define NVPW_CounterData_ExtractCounterDataPrefix_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterData_ExtractCounterDataPrefix_Params, counterDataPrefixSize)
150
+
151
+ NVPA_Status NVPW_CounterData_ExtractCounterDataPrefix(NVPW_CounterData_ExtractCounterDataPrefix_Params* pParams);
152
+
153
+ typedef struct NVPA_CounterDataCombiner NVPA_CounterDataCombiner;
154
+
155
+ typedef struct NVPW_CounterDataCombiner_Create_Params
156
+ {
157
+ /// [in]
158
+ size_t structSize;
159
+ /// [in] assign to NULL
160
+ void* pPriv;
161
+ /// The destination counter data into which the source datas will be combined
162
+ uint8_t* pCounterDataDst;
163
+ /// [out] The created counter data combiner
164
+ NVPA_CounterDataCombiner* pCounterDataCombiner;
165
+ } NVPW_CounterDataCombiner_Create_Params;
166
+ #define NVPW_CounterDataCombiner_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_Create_Params, pCounterDataCombiner)
167
+
168
+ NVPA_Status NVPW_CounterDataCombiner_Create(NVPW_CounterDataCombiner_Create_Params* pParams);
169
+
170
+ typedef struct NVPW_CounterDataCombiner_Destroy_Params
171
+ {
172
+ /// [in]
173
+ size_t structSize;
174
+ /// [in] assign to NULL
175
+ void* pPriv;
176
+ NVPA_CounterDataCombiner* pCounterDataCombiner;
177
+ } NVPW_CounterDataCombiner_Destroy_Params;
178
+ #define NVPW_CounterDataCombiner_Destroy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_Destroy_Params, pCounterDataCombiner)
179
+
180
+ NVPA_Status NVPW_CounterDataCombiner_Destroy(NVPW_CounterDataCombiner_Destroy_Params* pParams);
181
+
182
+ typedef struct NVPW_CounterDataCombiner_CreateRange_Params
183
+ {
184
+ /// [in]
185
+ size_t structSize;
186
+ /// [in] assign to NULL
187
+ void* pPriv;
188
+ NVPA_CounterDataCombiner* pCounterDataCombiner;
189
+ size_t numDescriptions;
190
+ const char* const* ppDescriptions;
191
+ /// [out]
192
+ size_t rangeIndexDst;
193
+ } NVPW_CounterDataCombiner_CreateRange_Params;
194
+ #define NVPW_CounterDataCombiner_CreateRange_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_CreateRange_Params, rangeIndexDst)
195
+
196
+ NVPA_Status NVPW_CounterDataCombiner_CreateRange(NVPW_CounterDataCombiner_CreateRange_Params* pParams);
197
+
198
+ typedef struct NVPW_CounterDataCombiner_CopyIntoRange_Params
199
+ {
200
+ /// [in]
201
+ size_t structSize;
202
+ /// [in] assign to NULL
203
+ void* pPriv;
204
+ /// [in]
205
+ NVPA_CounterDataCombiner* pCounterDataCombiner;
206
+ /// [in]
207
+ size_t rangeIndexDst;
208
+ /// [in]
209
+ const uint8_t* pCounterDataSrc;
210
+ /// [in]
211
+ size_t rangeIndexSrc;
212
+ } NVPW_CounterDataCombiner_CopyIntoRange_Params;
213
+ #define NVPW_CounterDataCombiner_CopyIntoRange_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_CopyIntoRange_Params, rangeIndexSrc)
214
+
215
+ /// In order to use this API, the source counter data and the destination counter data must have identical counters
216
+ NVPA_Status NVPW_CounterDataCombiner_CopyIntoRange(NVPW_CounterDataCombiner_CopyIntoRange_Params* pParams);
217
+
218
+ typedef struct NVPW_CounterDataCombiner_AccumulateIntoRange_Params
219
+ {
220
+ /// [in]
221
+ size_t structSize;
222
+ /// [in] assign to NULL
223
+ void* pPriv;
224
+ NVPA_CounterDataCombiner* pCounterDataCombiner;
225
+ size_t rangeIndexDst;
226
+ uint32_t dstMultiplier;
227
+ const uint8_t* pCounterDataSrc;
228
+ size_t rangeIndexSrc;
229
+ uint32_t srcMultiplier;
230
+ } NVPW_CounterDataCombiner_AccumulateIntoRange_Params;
231
+ #define NVPW_CounterDataCombiner_AccumulateIntoRange_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_AccumulateIntoRange_Params, srcMultiplier)
232
+
233
+ NVPA_Status NVPW_CounterDataCombiner_AccumulateIntoRange(NVPW_CounterDataCombiner_AccumulateIntoRange_Params* pParams);
234
+
235
+ typedef struct NVPW_CounterDataCombiner_SumIntoRange_Params
236
+ {
237
+ /// [in]
238
+ size_t structSize;
239
+ /// [in] assign to NULL
240
+ void* pPriv;
241
+ NVPA_CounterDataCombiner* pCounterDataCombiner;
242
+ size_t rangeIndexDst;
243
+ const uint8_t* pCounterDataSrc;
244
+ size_t rangeIndexSrc;
245
+ } NVPW_CounterDataCombiner_SumIntoRange_Params;
246
+ #define NVPW_CounterDataCombiner_SumIntoRange_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_SumIntoRange_Params, rangeIndexSrc)
247
+
248
+ NVPA_Status NVPW_CounterDataCombiner_SumIntoRange(NVPW_CounterDataCombiner_SumIntoRange_Params* pParams);
249
+
250
+ typedef struct NVPW_CounterDataCombiner_WeightedSumIntoRange_Params
251
+ {
252
+ /// [in]
253
+ size_t structSize;
254
+ /// [in] assign to NULL
255
+ void* pPriv;
256
+ NVPA_CounterDataCombiner* pCounterDataCombiner;
257
+ size_t rangeIndexDst;
258
+ double dstMultiplier;
259
+ const uint8_t* pCounterDataSrc;
260
+ size_t rangeIndexSrc;
261
+ double srcMultiplier;
262
+ } NVPW_CounterDataCombiner_WeightedSumIntoRange_Params;
263
+ #define NVPW_CounterDataCombiner_WeightedSumIntoRange_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_WeightedSumIntoRange_Params, srcMultiplier)
264
+
265
+ NVPA_Status NVPW_CounterDataCombiner_WeightedSumIntoRange(NVPW_CounterDataCombiner_WeightedSumIntoRange_Params* pParams);
266
+
267
+ /**
268
+ * @}
269
+ ******************************************************************************/
270
+
271
+ /***************************************************************************//**
272
+ * @name Metrics Configuration
273
+ * @{
274
+ */
275
+
276
+ typedef struct NVPA_RawMetricsConfig NVPA_RawMetricsConfig;
277
+
278
+ typedef struct NVPA_RawMetricRequest
279
+ {
280
+ /// [in]
281
+ size_t structSize;
282
+ /// [in] assign to NULL
283
+ void* pPriv;
284
+ /// in
285
+ const char* pMetricName;
286
+ /// in
287
+ NVPA_Bool isolated;
288
+ /// in; ignored by AddMetric but observed by CounterData initialization
289
+ NVPA_Bool keepInstances;
290
+ } NVPA_RawMetricRequest;
291
+ #define NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPA_RawMetricRequest, keepInstances)
292
+
293
+ typedef struct NVPW_GetSupportedChipNames_Params
294
+ {
295
+ /// [in]
296
+ size_t structSize;
297
+ /// [in] assign to NULL
298
+ void* pPriv;
299
+ /// [out]
300
+ const char* const* ppChipNames;
301
+ /// [out]
302
+ size_t numChipNames;
303
+ } NVPW_GetSupportedChipNames_Params;
304
+ #define NVPW_GetSupportedChipNames_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_GetSupportedChipNames_Params, numChipNames)
305
+
306
+ NVPA_Status NVPW_GetSupportedChipNames(NVPW_GetSupportedChipNames_Params* pParams);
307
+
308
+ typedef struct NVPW_RawMetricsConfig_Destroy_Params
309
+ {
310
+ /// [in]
311
+ size_t structSize;
312
+ /// [in] assign to NULL
313
+ void* pPriv;
314
+ NVPA_RawMetricsConfig* pRawMetricsConfig;
315
+ } NVPW_RawMetricsConfig_Destroy_Params;
316
+ #define NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_Destroy_Params, pRawMetricsConfig)
317
+
318
+ NVPA_Status NVPW_RawMetricsConfig_Destroy(NVPW_RawMetricsConfig_Destroy_Params* pParams);
319
+
320
+ typedef struct NVPW_RawMetricsConfig_SetCounterAvailability_Params
321
+ {
322
+ /// [in]
323
+ size_t structSize;
324
+ /// [in] assign to NULL
325
+ void* pPriv;
326
+ NVPA_RawMetricsConfig* pRawMetricsConfig;
327
+ /// [in] buffer with counter availability image
328
+ const uint8_t* pCounterAvailabilityImage;
329
+ } NVPW_RawMetricsConfig_SetCounterAvailability_Params;
330
+ #define NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_SetCounterAvailability_Params, pCounterAvailabilityImage)
331
+
332
+ NVPA_Status NVPW_RawMetricsConfig_SetCounterAvailability(NVPW_RawMetricsConfig_SetCounterAvailability_Params* pParams);
333
+
334
+ typedef struct NVPW_RawMetricsConfig_BeginPassGroup_Params
335
+ {
336
+ /// [in]
337
+ size_t structSize;
338
+ /// [in] assign to NULL
339
+ void* pPriv;
340
+ NVPA_RawMetricsConfig* pRawMetricsConfig;
341
+ size_t maxPassCount;
342
+ } NVPW_RawMetricsConfig_BeginPassGroup_Params;
343
+ #define NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_BeginPassGroup_Params, maxPassCount)
344
+
345
+ NVPA_Status NVPW_RawMetricsConfig_BeginPassGroup(NVPW_RawMetricsConfig_BeginPassGroup_Params* pParams);
346
+
347
+ typedef struct NVPW_RawMetricsConfig_EndPassGroup_Params
348
+ {
349
+ /// [in]
350
+ size_t structSize;
351
+ /// [in] assign to NULL
352
+ void* pPriv;
353
+ NVPA_RawMetricsConfig* pRawMetricsConfig;
354
+ } NVPW_RawMetricsConfig_EndPassGroup_Params;
355
+ #define NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_EndPassGroup_Params, pRawMetricsConfig)
356
+
357
+ NVPA_Status NVPW_RawMetricsConfig_EndPassGroup(NVPW_RawMetricsConfig_EndPassGroup_Params* pParams);
358
+
359
+ typedef struct NVPW_RawMetricsConfig_GetNumMetrics_Params
360
+ {
361
+ /// [in]
362
+ size_t structSize;
363
+ /// [in] assign to NULL
364
+ void* pPriv;
365
+ const NVPA_RawMetricsConfig* pRawMetricsConfig;
366
+ /// [out]
367
+ size_t numMetrics;
368
+ } NVPW_RawMetricsConfig_GetNumMetrics_Params;
369
+ #define NVPW_RawMetricsConfig_GetNumMetrics_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetNumMetrics_Params, numMetrics)
370
+
371
+ NVPA_Status NVPW_RawMetricsConfig_GetNumMetrics(NVPW_RawMetricsConfig_GetNumMetrics_Params* pParams);
372
+
373
+ typedef struct NVPW_RawMetricsConfig_GetMetricProperties_Params
374
+ {
375
+ /// [in]
376
+ size_t structSize;
377
+ /// [in] assign to NULL
378
+ void* pPriv;
379
+ const NVPA_RawMetricsConfig* pRawMetricsConfig;
380
+ size_t metricIndex;
381
+ /// [out]
382
+ const char* pMetricName;
383
+ /// [out]
384
+ NVPA_Bool supportsPipelined;
385
+ /// [out]
386
+ NVPA_Bool supportsIsolated;
387
+ } NVPW_RawMetricsConfig_GetMetricProperties_Params;
388
+ #define NVPW_RawMetricsConfig_GetMetricProperties_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetMetricProperties_Params, supportsIsolated)
389
+
390
+ NVPA_Status NVPW_RawMetricsConfig_GetMetricProperties(NVPW_RawMetricsConfig_GetMetricProperties_Params* pParams);
391
+
392
+ typedef struct NVPW_RawMetricsConfig_GetMetricProperties_V2_Params
393
+ {
394
+ /// [in]
395
+ size_t structSize;
396
+ /// [in] assign to NULL
397
+ void* pPriv;
398
+ const NVPA_RawMetricsConfig* pRawMetricsConfig;
399
+ size_t metricIndex;
400
+ /// [out]
401
+ const char* pMetricName;
402
+ } NVPW_RawMetricsConfig_GetMetricProperties_V2_Params;
403
+ #define NVPW_RawMetricsConfig_GetMetricProperties_V2_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetMetricProperties_V2_Params, pMetricName)
404
+
405
+ NVPA_Status NVPW_RawMetricsConfig_GetMetricProperties_V2(NVPW_RawMetricsConfig_GetMetricProperties_V2_Params* pParams);
406
+
407
+ typedef struct NVPW_RawMetricsConfig_AddMetrics_Params
408
+ {
409
+ /// [in]
410
+ size_t structSize;
411
+ /// [in] assign to NULL
412
+ void* pPriv;
413
+ NVPA_RawMetricsConfig* pRawMetricsConfig;
414
+ const NVPA_RawMetricRequest* pRawMetricRequests;
415
+ size_t numMetricRequests;
416
+ } NVPW_RawMetricsConfig_AddMetrics_Params;
417
+ #define NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_AddMetrics_Params, numMetricRequests)
418
+
419
+ NVPA_Status NVPW_RawMetricsConfig_AddMetrics(NVPW_RawMetricsConfig_AddMetrics_Params* pParams);
420
+
421
+ typedef struct NVPW_RawMetricsConfig_IsAddMetricsPossible_Params
422
+ {
423
+ /// [in]
424
+ size_t structSize;
425
+ /// [in] assign to NULL
426
+ void* pPriv;
427
+ const NVPA_RawMetricsConfig* pRawMetricsConfig;
428
+ const NVPA_RawMetricRequest* pRawMetricRequests;
429
+ size_t numMetricRequests;
430
+ /// [out]
431
+ NVPA_Bool isPossible;
432
+ } NVPW_RawMetricsConfig_IsAddMetricsPossible_Params;
433
+ #define NVPW_RawMetricsConfig_IsAddMetricsPossible_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_IsAddMetricsPossible_Params, isPossible)
434
+
435
+ NVPA_Status NVPW_RawMetricsConfig_IsAddMetricsPossible(NVPW_RawMetricsConfig_IsAddMetricsPossible_Params* pParams);
436
+
437
+ typedef struct NVPW_RawMetricsConfig_GenerateConfigImage_Params
438
+ {
439
+ /// [in]
440
+ size_t structSize;
441
+ /// [in] assign to NULL
442
+ void* pPriv;
443
+ NVPA_RawMetricsConfig* pRawMetricsConfig;
444
+ /// [in] If true, all existing pass groups may be merged to reduce number of passes.
445
+ /// If merge was successful, distribution of counters in passes may be updated as a side-effect. The effects
446
+ /// will be persistent in pRawMetricsConfig.
447
+ NVPA_Bool mergeAllPassGroups;
448
+ } NVPW_RawMetricsConfig_GenerateConfigImage_Params;
449
+ #define NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GenerateConfigImage_Params, mergeAllPassGroups)
450
+
451
+ /// This API may fail if called inside a pass group with `mergeAllPassGroups` = true.
452
+ NVPA_Status NVPW_RawMetricsConfig_GenerateConfigImage(NVPW_RawMetricsConfig_GenerateConfigImage_Params* pParams);
453
+
454
+ typedef struct NVPW_RawMetricsConfig_GetConfigImage_Params
455
+ {
456
+ /// [in]
457
+ size_t structSize;
458
+ /// [in] assign to NULL
459
+ void* pPriv;
460
+ const NVPA_RawMetricsConfig* pRawMetricsConfig;
461
+ /// [in] Number of bytes allocated for pBuffer
462
+ size_t bytesAllocated;
463
+ /// [out] [optional] Buffer receiving the config image
464
+ uint8_t* pBuffer;
465
+ /// [out] Count of bytes that would be copied into pBuffer
466
+ size_t bytesCopied;
467
+ } NVPW_RawMetricsConfig_GetConfigImage_Params;
468
+ #define NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetConfigImage_Params, bytesCopied)
469
+
470
+ NVPA_Status NVPW_RawMetricsConfig_GetConfigImage(NVPW_RawMetricsConfig_GetConfigImage_Params* pParams);
471
+
472
+ typedef struct NVPW_RawMetricsConfig_GetNumPasses_Params
473
+ {
474
+ /// [in]
475
+ size_t structSize;
476
+ /// [in] assign to NULL
477
+ void* pPriv;
478
+ const NVPA_RawMetricsConfig* pRawMetricsConfig;
479
+ /// [out]
480
+ size_t numPipelinedPasses;
481
+ /// [out]
482
+ size_t numIsolatedPasses;
483
+ } NVPW_RawMetricsConfig_GetNumPasses_Params;
484
+ #define NVPW_RawMetricsConfig_GetNumPasses_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetNumPasses_Params, numIsolatedPasses)
485
+
486
+ /// Total num passes = numPipelinedPasses + numIsolatedPasses * numNestingLevels
487
+ NVPA_Status NVPW_RawMetricsConfig_GetNumPasses(NVPW_RawMetricsConfig_GetNumPasses_Params* pParams);
488
+
489
+ typedef struct NVPW_RawMetricsConfig_GetNumPasses_V2_Params
490
+ {
491
+ /// [in]
492
+ size_t structSize;
493
+ /// [in] assign to NULL
494
+ void* pPriv;
495
+ /// [in]
496
+ const NVPA_RawMetricsConfig* pRawMetricsConfig;
497
+ /// [out]
498
+ size_t numPasses;
499
+ } NVPW_RawMetricsConfig_GetNumPasses_V2_Params;
500
+ #define NVPW_RawMetricsConfig_GetNumPasses_V2_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetNumPasses_V2_Params, numPasses)
501
+
502
+ /// Total num passes = numPasses * numNestingLevels
503
+ NVPA_Status NVPW_RawMetricsConfig_GetNumPasses_V2(NVPW_RawMetricsConfig_GetNumPasses_V2_Params* pParams);
504
+
505
+ typedef struct NVPW_PeriodicSampler_Config_GetSocEstimatedSampleSize_Params
506
+ {
507
+ /// [in]
508
+ size_t structSize;
509
+ /// [in] assign to NULL
510
+ void* pPriv;
511
+ /// [in] Typically created by e.g. NVPW_RawMetricsConfig_GetConfigImage(), must be align(8).
512
+ const uint8_t* pConfig;
513
+ /// [in]
514
+ size_t configSize;
515
+ /// [out]
516
+ size_t sampleSize;
517
+ } NVPW_PeriodicSampler_Config_GetSocEstimatedSampleSize_Params;
518
+ #define NVPW_PeriodicSampler_Config_GetSocEstimatedSampleSize_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_PeriodicSampler_Config_GetSocEstimatedSampleSize_Params, sampleSize)
519
+
520
+ /// Estimate per sample records size based on a virtual device
521
+ NVPA_Status NVPW_PeriodicSampler_Config_GetSocEstimatedSampleSize(NVPW_PeriodicSampler_Config_GetSocEstimatedSampleSize_Params* pParams);
522
+
523
+ typedef struct NVPW_PeriodicSampler_Config_GetGpuEstimatedSampleSize_Params
524
+ {
525
+ /// [in]
526
+ size_t structSize;
527
+ /// [in] assign to NULL
528
+ void* pPriv;
529
+ /// [in] Typically created by e.g. NVPW_RawMetricsConfig_GetConfigImage(), must be align(8).
530
+ const uint8_t* pConfig;
531
+ /// [in]
532
+ size_t configSize;
533
+ /// [out]
534
+ size_t sampleSize;
535
+ } NVPW_PeriodicSampler_Config_GetGpuEstimatedSampleSize_Params;
536
+ #define NVPW_PeriodicSampler_Config_GetGpuEstimatedSampleSize_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_PeriodicSampler_Config_GetGpuEstimatedSampleSize_Params, sampleSize)
537
+
538
+ /// Estimate per sample records size based on a virtual device
539
+ NVPA_Status NVPW_PeriodicSampler_Config_GetGpuEstimatedSampleSize(NVPW_PeriodicSampler_Config_GetGpuEstimatedSampleSize_Params* pParams);
540
+
541
+ /**
542
+ * @}
543
+ ******************************************************************************/
544
+
545
+ typedef struct NVPW_Config_GetRawCounterInfo_Params
546
+ {
547
+ /// [in]
548
+ size_t structSize;
549
+ /// [in] assign to NULL
550
+ void* pPriv;
551
+ /// [in]
552
+ const uint8_t* pConfig;
553
+ /// [in]
554
+ size_t configSize;
555
+ /// [in]
556
+ const char* pRawCounterName;
557
+ /// [inout] array containing indices of passes the counter resides in. 'pPassIndices' is in, '*pPassIndices' is
558
+ /// out.
559
+ size_t* pPassIndices;
560
+ /// [inout] if 'pPassIndices' is NULL, the count of passes this counter resides in will be returned; otherwise
561
+ /// it should be set to the capacity of 'pPassIndices' array, and on return, it will be overwritten to reflect
562
+ /// the actual count filled into 'pPassIndices'
563
+ size_t numPassIndices;
564
+ } NVPW_Config_GetRawCounterInfo_Params;
565
+ #define NVPW_Config_GetRawCounterInfo_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Config_GetRawCounterInfo_Params, numPassIndices)
566
+
567
+ NVPA_Status NVPW_Config_GetRawCounterInfo(NVPW_Config_GetRawCounterInfo_Params* pParams);
568
+
569
+ typedef struct NVPW_Config_GetRawCounters_Params
570
+ {
571
+ /// [in]
572
+ size_t structSize;
573
+ /// [in] assign to NULL
574
+ void* pPriv;
575
+ /// [in]
576
+ const uint8_t* pConfig;
577
+ /// [in]
578
+ size_t configSize;
579
+ /// [in]
580
+ size_t passIndex;
581
+ /// [inout] array containing raw counter names. 'ppRawCounterNames' is in, '*ppRawCounterNames' is out.
582
+ const char** ppRawCounterNames;
583
+ /// [inout] if 'ppRawCounterNames' is NULL, the count of raw counters will be returned; otherwise it should be
584
+ /// set to the capacity of 'ppRawCounterNames' array, and on return, it will be overwritten to reflect the
585
+ /// actual count filled into 'ppRawCounterNames'
586
+ size_t numRawCounters;
587
+ } NVPW_Config_GetRawCounters_Params;
588
+ #define NVPW_Config_GetRawCounters_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Config_GetRawCounters_Params, numRawCounters)
589
+
590
+ NVPA_Status NVPW_Config_GetRawCounters(NVPW_Config_GetRawCounters_Params* pParams);
591
+
592
+ /***************************************************************************//**
593
+ * @name CounterData Creation
594
+ * @{
595
+ */
596
+
597
+ typedef struct NVPA_CounterDataBuilder NVPA_CounterDataBuilder;
598
+
599
+ typedef struct NVPW_CounterDataBuilder_Create_Params
600
+ {
601
+ /// [in]
602
+ size_t structSize;
603
+ /// [in] assign to NULL
604
+ void* pPriv;
605
+ /// [out]
606
+ NVPA_CounterDataBuilder* pCounterDataBuilder;
607
+ const char* pChipName;
608
+ } NVPW_CounterDataBuilder_Create_Params;
609
+ #define NVPW_CounterDataBuilder_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataBuilder_Create_Params, pChipName)
610
+
611
+ NVPA_Status NVPW_CounterDataBuilder_Create(NVPW_CounterDataBuilder_Create_Params* pParams);
612
+
613
+ typedef struct NVPW_CounterDataBuilder_Destroy_Params
614
+ {
615
+ /// [in]
616
+ size_t structSize;
617
+ /// [in] assign to NULL
618
+ void* pPriv;
619
+ NVPA_CounterDataBuilder* pCounterDataBuilder;
620
+ } NVPW_CounterDataBuilder_Destroy_Params;
621
+ #define NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataBuilder_Destroy_Params, pCounterDataBuilder)
622
+
623
+ NVPA_Status NVPW_CounterDataBuilder_Destroy(NVPW_CounterDataBuilder_Destroy_Params* pParams);
624
+
625
+ typedef struct NVPW_CounterDataBuilder_AddMetrics_Params
626
+ {
627
+ /// [in]
628
+ size_t structSize;
629
+ /// [in] assign to NULL
630
+ void* pPriv;
631
+ NVPA_CounterDataBuilder* pCounterDataBuilder;
632
+ const NVPA_RawMetricRequest* pRawMetricRequests;
633
+ size_t numMetricRequests;
634
+ } NVPW_CounterDataBuilder_AddMetrics_Params;
635
+ #define NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataBuilder_AddMetrics_Params, numMetricRequests)
636
+
637
+ NVPA_Status NVPW_CounterDataBuilder_AddMetrics(NVPW_CounterDataBuilder_AddMetrics_Params* pParams);
638
+
639
+ typedef struct NVPW_CounterDataBuilder_GetCounterDataPrefix_Params
640
+ {
641
+ /// [in]
642
+ size_t structSize;
643
+ /// [in] assign to NULL
644
+ void* pPriv;
645
+ NVPA_CounterDataBuilder* pCounterDataBuilder;
646
+ /// [in] Number of bytes allocated for pBuffer
647
+ size_t bytesAllocated;
648
+ /// [out] [optional] Buffer receiving the counter data prefix
649
+ uint8_t* pBuffer;
650
+ /// [out] Count of bytes that would be copied to pBuffer
651
+ size_t bytesCopied;
652
+ } NVPW_CounterDataBuilder_GetCounterDataPrefix_Params;
653
+ #define NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataBuilder_GetCounterDataPrefix_Params, bytesCopied)
654
+
655
+ NVPA_Status NVPW_CounterDataBuilder_GetCounterDataPrefix(NVPW_CounterDataBuilder_GetCounterDataPrefix_Params* pParams);
656
+
657
+ /**
658
+ * @}
659
+ ******************************************************************************/
660
+
661
+ /***************************************************************************//**
662
+ * @name Metrics Evaluator
663
+ * @{
664
+ */
665
+
666
+ typedef struct NVPW_MetricsEvaluator NVPW_MetricsEvaluator;
667
+
668
+ #ifndef NVPW_DIM_UNIT_DEFINED
669
+ #define NVPW_DIM_UNIT_DEFINED
670
+ typedef enum NVPW_DimUnitName
671
+ {
672
+ NVPW_DIM_UNIT_INVALID = 3518299157,
673
+ NVPW_DIM_UNIT_UNITLESS = 2126137902,
674
+ NVPW_DIM_UNIT_ATTRIBUTES = 3776338729,
675
+ NVPW_DIM_UNIT_BYTES = 3797850191,
676
+ NVPW_DIM_UNIT_CTAS = 1960564139,
677
+ NVPW_DIM_UNIT_CTC_CYCLES = 2224883873,
678
+ NVPW_DIM_UNIT_DRAM_CYCLES = 2650981327,
679
+ NVPW_DIM_UNIT_FBP_CYCLES = 1785238957,
680
+ NVPW_DIM_UNIT_FE_OPS = 2919159083,
681
+ NVPW_DIM_UNIT_GPC_CYCLES = 1222631184,
682
+ NVPW_DIM_UNIT_IDC_REQUESTS = 2012649669,
683
+ NVPW_DIM_UNIT_INSTRUCTIONS = 1418625543,
684
+ NVPW_DIM_UNIT_KILOBYTES = 1335980302,
685
+ NVPW_DIM_UNIT_L1DATA_BANK_ACCESSES = 1479493682,
686
+ NVPW_DIM_UNIT_L1DATA_BANK_CONFLICTS = 3433170787,
687
+ NVPW_DIM_UNIT_L1TEX_REQUESTS = 1306473767,
688
+ NVPW_DIM_UNIT_L1TEX_TAGS = 26573010,
689
+ NVPW_DIM_UNIT_L1TEX_WAVEFRONTS = 129373765,
690
+ NVPW_DIM_UNIT_L2_REQUESTS = 1143695106,
691
+ NVPW_DIM_UNIT_L2_SECTORS = 3424101564,
692
+ NVPW_DIM_UNIT_L2_TAGS = 3755612781,
693
+ NVPW_DIM_UNIT_LRC_REQUESTS = 2280914327,
694
+ NVPW_DIM_UNIT_LRC_SECTORS = 7212034,
695
+ NVPW_DIM_UNIT_MCC_CYCLES = 1826685787,
696
+ NVPW_DIM_UNIT_NANOSECONDS = 3047500672,
697
+ NVPW_DIM_UNIT_NVDLA_CYCLES = 3374059789,
698
+ NVPW_DIM_UNIT_NVENC_CYCLES = 2267185244,
699
+ NVPW_DIM_UNIT_NVLRX_CYCLES = 4059934930,
700
+ NVPW_DIM_UNIT_NVLTX_CYCLES = 1814350488,
701
+ NVPW_DIM_UNIT_OFA_CYCLES = 4290210307,
702
+ NVPW_DIM_UNIT_PCIE_CYCLES = 1230450943,
703
+ NVPW_DIM_UNIT_PERCENT = 1284354694,
704
+ NVPW_DIM_UNIT_PIXELS = 4227616663,
705
+ NVPW_DIM_UNIT_PIXEL_SHADER_BARRIERS = 3705502518,
706
+ NVPW_DIM_UNIT_PRIMITIVES = 2373084002,
707
+ NVPW_DIM_UNIT_PVAVPU_CYCLES = 2238259366,
708
+ NVPW_DIM_UNIT_PVA_CYCLES = 202044173,
709
+ NVPW_DIM_UNIT_QUADS = 1539753497,
710
+ NVPW_DIM_UNIT_REGISTERS = 2837260947,
711
+ NVPW_DIM_UNIT_SAMPLES = 746046551,
712
+ NVPW_DIM_UNIT_SECONDS = 1164825258,
713
+ NVPW_DIM_UNIT_SYSL2_REQUESTS = 2165109286,
714
+ NVPW_DIM_UNIT_SYSL2_SECTORS = 2268734175,
715
+ NVPW_DIM_UNIT_SYSL2_TAGS = 3308651352,
716
+ NVPW_DIM_UNIT_SYSLRC_REQUESTS = 3328245480,
717
+ NVPW_DIM_UNIT_SYSLRC_SECTORS = 1190477493,
718
+ NVPW_DIM_UNIT_SYS_CYCLES = 3310821688,
719
+ NVPW_DIM_UNIT_TEXELS = 1293214069,
720
+ NVPW_DIM_UNIT_THREADS = 164261907,
721
+ NVPW_DIM_UNIT_TMEM_ACCESSES = 3742902067,
722
+ NVPW_DIM_UNIT_VERTICES = 1873662209,
723
+ NVPW_DIM_UNIT_VIC_CYCLES = 103143588,
724
+ NVPW_DIM_UNIT_WARPS = 97951949,
725
+ NVPW_DIM_UNIT_WORKIDS = 1971113483,
726
+ NVPW_DIM_UNIT_WORKLOADS = 1728142656
727
+ } NVPW_DimUnitName;
728
+ #endif //NVPW_DIM_UNIT_DEFINED
729
+
730
+ #ifndef NVPW_HW_UNIT_DEFINED
731
+ #define NVPW_HW_UNIT_DEFINED
732
+ typedef enum NVPW_HwUnit
733
+ {
734
+ NVPW_HW_UNIT_INVALID = 3498035701,
735
+ NVPW_HW_UNIT_CROP = 2872137846,
736
+ NVPW_HW_UNIT_CTC = 4123164475,
737
+ NVPW_HW_UNIT_DRAM = 1662616918,
738
+ NVPW_HW_UNIT_DRAMC = 1401232876,
739
+ NVPW_HW_UNIT_FBP = 2947194306,
740
+ NVPW_HW_UNIT_FBPA = 690045803,
741
+ NVPW_HW_UNIT_FE = 2204924321,
742
+ NVPW_HW_UNIT_GPC = 1911735839,
743
+ NVPW_HW_UNIT_GPU = 1014363534,
744
+ NVPW_HW_UNIT_GR = 2933618517,
745
+ NVPW_HW_UNIT_IDC = 842765289,
746
+ NVPW_HW_UNIT_L1TEX = 893940957,
747
+ NVPW_HW_UNIT_LRC = 4004756136,
748
+ NVPW_HW_UNIT_LTS = 2333266697,
749
+ NVPW_HW_UNIT_MCC = 3980130194,
750
+ NVPW_HW_UNIT_NVDLA = 4201167892,
751
+ NVPW_HW_UNIT_NVENC = 207708260,
752
+ NVPW_HW_UNIT_NVLRX = 3091684901,
753
+ NVPW_HW_UNIT_NVLTX = 869679659,
754
+ NVPW_HW_UNIT_OFA = 70307371,
755
+ NVPW_HW_UNIT_PCIE = 3433264174,
756
+ NVPW_HW_UNIT_PDA = 345193251,
757
+ NVPW_HW_UNIT_PES = 804128425,
758
+ NVPW_HW_UNIT_PROP = 3339255507,
759
+ NVPW_HW_UNIT_PVA = 2565499490,
760
+ NVPW_HW_UNIT_PVAVPU = 1656645655,
761
+ NVPW_HW_UNIT_RASTER = 187932504,
762
+ NVPW_HW_UNIT_SM = 724224710,
763
+ NVPW_HW_UNIT_SMSP = 2837616917,
764
+ NVPW_HW_UNIT_SYS = 768990063,
765
+ NVPW_HW_UNIT_SYSLRC = 3247626950,
766
+ NVPW_HW_UNIT_SYSLTS = 4137740217,
767
+ NVPW_HW_UNIT_TPC = 1889024613,
768
+ NVPW_HW_UNIT_VAF = 753670509,
769
+ NVPW_HW_UNIT_VIC = 322439594,
770
+ NVPW_HW_UNIT_VPC = 275561583,
771
+ NVPW_HW_UNIT_ZCULL = 2401248356,
772
+ NVPW_HW_UNIT_ZROP = 979500456
773
+ } NVPW_HwUnit;
774
+ #endif //NVPW_HW_UNIT_DEFINED
775
+
776
+ typedef enum NVPW_RollupOp
777
+ {
778
+ NVPW_ROLLUP_OP_AVG = 0,
779
+ NVPW_ROLLUP_OP_MAX,
780
+ NVPW_ROLLUP_OP_MIN,
781
+ NVPW_ROLLUP_OP_SUM,
782
+ NVPW_ROLLUP_OP__COUNT
783
+ } NVPW_RollupOp;
784
+
785
+ typedef enum NVPW_MetricType
786
+ {
787
+ NVPW_METRIC_TYPE_COUNTER = 0,
788
+ NVPW_METRIC_TYPE_RATIO,
789
+ NVPW_METRIC_TYPE_THROUGHPUT,
790
+ NVPW_METRIC_TYPE__COUNT
791
+ } NVPW_MetricType;
792
+
793
+ typedef enum NVPW_Submetric
794
+ {
795
+ NVPW_SUBMETRIC_NONE = 0,
796
+ NVPW_SUBMETRIC_PEAK_SUSTAINED = 1,
797
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_ACTIVE = 2,
798
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_ACTIVE_PER_SECOND = 3,
799
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_ELAPSED = 4,
800
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_ELAPSED_PER_SECOND = 5,
801
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_FRAME = 6,
802
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_FRAME_PER_SECOND = 7,
803
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_REGION = 8,
804
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_REGION_PER_SECOND = 9,
805
+ NVPW_SUBMETRIC_PER_CYCLE_ACTIVE = 10,
806
+ NVPW_SUBMETRIC_PER_CYCLE_ELAPSED = 11,
807
+ NVPW_SUBMETRIC_PER_CYCLE_IN_FRAME = 12,
808
+ NVPW_SUBMETRIC_PER_CYCLE_IN_REGION = 13,
809
+ NVPW_SUBMETRIC_PER_SECOND = 14,
810
+ NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_ACTIVE = 15,
811
+ NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_ELAPSED = 16,
812
+ NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_FRAME = 17,
813
+ NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_REGION = 18,
814
+ NVPW_SUBMETRIC_MAX_RATE = 19,
815
+ NVPW_SUBMETRIC_PCT = 20,
816
+ NVPW_SUBMETRIC_RATIO = 21,
817
+ NVPW_SUBMETRIC__COUNT
818
+ } NVPW_Submetric;
819
+
820
+ typedef struct NVPW_MetricEvalRequest
821
+ {
822
+ /// the metric index as in 'NVPW_MetricsEvaluator_GetMetricNames'
823
+ size_t metricIndex;
824
+ /// one of 'NVPW_MetricType'
825
+ uint8_t metricType;
826
+ /// one of 'NVPW_RollupOp', required for Counter and Throughput, doesn't apply to Ratio
827
+ uint8_t rollupOp;
828
+ /// one of 'NVPW_Submetric', required for Ratio and Throughput, optional for Counter
829
+ uint16_t submetric;
830
+ } NVPW_MetricEvalRequest;
831
+ #define NVPW_MetricEvalRequest_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricEvalRequest, submetric)
832
+
833
+ typedef struct NVPW_DimUnitFactor
834
+ {
835
+ /// one of 'NVPW_DimUnitName'
836
+ uint32_t dimUnit;
837
+ int8_t exponent;
838
+ } NVPW_DimUnitFactor;
839
+ #define NVPW_DimUnitFactor_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_DimUnitFactor, exponent)
840
+
841
+ typedef struct NVPW_MetricsEvaluator_Destroy_Params
842
+ {
843
+ /// [in]
844
+ size_t structSize;
845
+ /// [in] assign to NULL
846
+ void* pPriv;
847
+ /// [in]
848
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
849
+ } NVPW_MetricsEvaluator_Destroy_Params;
850
+ #define NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_Destroy_Params, pMetricsEvaluator)
851
+
852
+ NVPA_Status NVPW_MetricsEvaluator_Destroy(NVPW_MetricsEvaluator_Destroy_Params* pParams);
853
+
854
+ typedef struct NVPW_MetricsEvaluator_GetMetricNames_Params
855
+ {
856
+ /// [in]
857
+ size_t structSize;
858
+ /// [in] assign to NULL
859
+ void* pPriv;
860
+ /// [in]
861
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
862
+ /// [in] one of 'NVPW_MetricType'
863
+ uint8_t metricType;
864
+ /// [out]
865
+ const char* pMetricNames;
866
+ /// [out]
867
+ const size_t* pMetricNameBeginIndices;
868
+ /// [out]
869
+ size_t numMetrics;
870
+ } NVPW_MetricsEvaluator_GetMetricNames_Params;
871
+ #define NVPW_MetricsEvaluator_GetMetricNames_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetMetricNames_Params, numMetrics)
872
+
873
+ NVPA_Status NVPW_MetricsEvaluator_GetMetricNames(NVPW_MetricsEvaluator_GetMetricNames_Params* pParams);
874
+
875
+ typedef struct NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params
876
+ {
877
+ /// [in]
878
+ size_t structSize;
879
+ /// [in] assign to NULL
880
+ void* pPriv;
881
+ /// [in]
882
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
883
+ /// [in] can be either a base metric or a metric
884
+ const char* pMetricName;
885
+ /// [out] one of 'NVPW_MetricType'
886
+ uint8_t metricType;
887
+ /// [out] the metric index as in 'NVPW_MetricsEvaluator_GetMetricNames'
888
+ size_t metricIndex;
889
+ } NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params;
890
+ #define NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params, metricIndex)
891
+
892
+ NVPA_Status NVPW_MetricsEvaluator_GetMetricTypeAndIndex(NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params* pParams);
893
+
894
+ typedef struct NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params
895
+ {
896
+ /// [in]
897
+ size_t structSize;
898
+ /// [in] assign to NULL
899
+ void* pPriv;
900
+ /// [in]
901
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
902
+ /// [in]
903
+ const char* pMetricName;
904
+ /// [inout] 'pMetricEvalRequest' is in, '*pMetricEvalRequest' is out
905
+ struct NVPW_MetricEvalRequest* pMetricEvalRequest;
906
+ /// [in] set to 'NVPW_MetricEvalRequest_STRUCT_SIZE'
907
+ size_t metricEvalRequestStructSize;
908
+ } NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params;
909
+ #define NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params, metricEvalRequestStructSize)
910
+
911
+ NVPA_Status NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params* pParams);
912
+
913
+ typedef struct NVPW_MetricsEvaluator_HwUnitToString_Params
914
+ {
915
+ /// [in]
916
+ size_t structSize;
917
+ /// [in] assign to NULL
918
+ void* pPriv;
919
+ /// [in]
920
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
921
+ /// [in] one of 'NVPW_HwUnit'
922
+ uint32_t hwUnit;
923
+ /// [out]
924
+ const char* pHwUnitName;
925
+ } NVPW_MetricsEvaluator_HwUnitToString_Params;
926
+ #define NVPW_MetricsEvaluator_HwUnitToString_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_HwUnitToString_Params, pHwUnitName)
927
+
928
+ NVPA_Status NVPW_MetricsEvaluator_HwUnitToString(NVPW_MetricsEvaluator_HwUnitToString_Params* pParams);
929
+
930
+ typedef struct NVPW_MetricsEvaluator_GetCounterProperties_Params
931
+ {
932
+ /// [in]
933
+ size_t structSize;
934
+ /// [in] assign to NULL
935
+ void* pPriv;
936
+ /// [in]
937
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
938
+ /// [in] the metric index as in 'NVPW_MetricsEvaluator_GetMetricNames'
939
+ size_t counterIndex;
940
+ /// [out]
941
+ const char* pDescription;
942
+ /// [out] one of 'NVPW_HwUnit'
943
+ uint32_t hwUnit;
944
+ } NVPW_MetricsEvaluator_GetCounterProperties_Params;
945
+ #define NVPW_MetricsEvaluator_GetCounterProperties_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetCounterProperties_Params, hwUnit)
946
+
947
+ NVPA_Status NVPW_MetricsEvaluator_GetCounterProperties(NVPW_MetricsEvaluator_GetCounterProperties_Params* pParams);
948
+
949
+ typedef struct NVPW_MetricsEvaluator_GetRatioMetricProperties_Params
950
+ {
951
+ /// [in]
952
+ size_t structSize;
953
+ /// [in] assign to NULL
954
+ void* pPriv;
955
+ /// [in]
956
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
957
+ /// [in] the metric index as in 'NVPW_MetricsEvaluator_GetMetricNames'
958
+ size_t ratioMetricIndex;
959
+ /// [out]
960
+ const char* pDescription;
961
+ /// [out]
962
+ uint64_t hwUnit;
963
+ } NVPW_MetricsEvaluator_GetRatioMetricProperties_Params;
964
+ #define NVPW_MetricsEvaluator_GetRatioMetricProperties_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetRatioMetricProperties_Params, hwUnit)
965
+
966
+ NVPA_Status NVPW_MetricsEvaluator_GetRatioMetricProperties(NVPW_MetricsEvaluator_GetRatioMetricProperties_Params* pParams);
967
+
968
+ typedef struct NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params
969
+ {
970
+ /// [in]
971
+ size_t structSize;
972
+ /// [in] assign to NULL
973
+ void* pPriv;
974
+ /// [in]
975
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
976
+ /// [in] the metric index as in 'NVPW_MetricsEvaluator_GetMetricNames'
977
+ size_t throughputMetricIndex;
978
+ /// [out]
979
+ const char* pDescription;
980
+ /// [out]
981
+ uint32_t hwUnit;
982
+ /// [out] number of constituent counters for the throughput metric
983
+ size_t numCounters;
984
+ /// [out] metric indices as in 'NVPW_MetricsEvaluator_GetMetricNames', valid if 'numCounters' > 0, otherwise
985
+ /// returned as nullptr
986
+ const size_t* pCounterIndices;
987
+ /// [out] number of constituent sub-throughputs for the throughput metric
988
+ size_t numSubThroughputs;
989
+ /// [out] metric indices as in 'NVPW_MetricsEvaluator_GetMetricNames', valid if 'numSubThroughputs' > 0,
990
+ /// otherwise returned as nullptr
991
+ const size_t* pSubThroughputIndices;
992
+ } NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params;
993
+ #define NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params, pSubThroughputIndices)
994
+
995
+ NVPA_Status NVPW_MetricsEvaluator_GetThroughputMetricProperties(NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params* pParams);
996
+
997
+ typedef struct NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params
998
+ {
999
+ /// [in]
1000
+ size_t structSize;
1001
+ /// [in] assign to NULL
1002
+ void* pPriv;
1003
+ /// [in]
1004
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1005
+ /// [in] one of 'NVPW_MetricType'
1006
+ uint8_t metricType;
1007
+ /// [out] an array of 'NVPW_Submetric'
1008
+ const uint16_t* pSupportedSubmetrics;
1009
+ /// [out]
1010
+ size_t numSupportedSubmetrics;
1011
+ } NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params;
1012
+ #define NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params, numSupportedSubmetrics)
1013
+
1014
+ NVPA_Status NVPW_MetricsEvaluator_GetSupportedSubmetrics(NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params* pParams);
1015
+
1016
+ typedef struct NVPW_MetricsEvaluator_GetMetricRawDependencies_Params
1017
+ {
1018
+ /// [in]
1019
+ size_t structSize;
1020
+ /// [in] assign to NULL
1021
+ void* pPriv;
1022
+ /// [in]
1023
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1024
+ /// [in]
1025
+ const struct NVPW_MetricEvalRequest* pMetricEvalRequests;
1026
+ /// [in]
1027
+ size_t numMetricEvalRequests;
1028
+ /// [in] set to 'NVPW_MetricEvalRequest_STRUCT_SIZE'
1029
+ size_t metricEvalRequestStructSize;
1030
+ /// [in] set to sizeof('NVPW_MetricEvalRequest')
1031
+ size_t metricEvalRequestStrideSize;
1032
+ /// [inout] 'ppRawDependencies' is in, '*ppRawDependencies' is out
1033
+ const char** ppRawDependencies;
1034
+ /// [inout] if 'ppRawDependencies' is NULL, number of raw dependencies available will be returned; otherwise it
1035
+ /// should be set to the number of elements allocated for 'ppRawDependencies', and on return, it will be
1036
+ /// overwritten by number of elements copied to 'ppRawDependencies'
1037
+ size_t numRawDependencies;
1038
+ /// [inout] 'ppOptionalRawDependencies' is in, '*ppOptionalRawDependencies' is out
1039
+ const char** ppOptionalRawDependencies;
1040
+ /// [inout] if 'ppOptionalRawDependencies' is NULL, number of optional raw dependencies available will be
1041
+ /// returned; otherwise it should be set to the number of elements allocated for 'ppOptionalRawDependencies',
1042
+ /// and on return, it will be overwritten by number of elements copied to 'ppOptionalRawDependencies'
1043
+ size_t numOptionalRawDependencies;
1044
+ } NVPW_MetricsEvaluator_GetMetricRawDependencies_Params;
1045
+ #define NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetMetricRawDependencies_Params, numOptionalRawDependencies)
1046
+
1047
+ NVPA_Status NVPW_MetricsEvaluator_GetMetricRawDependencies(NVPW_MetricsEvaluator_GetMetricRawDependencies_Params* pParams);
1048
+
1049
+ typedef struct NVPW_MetricsEvaluator_DimUnitToString_Params
1050
+ {
1051
+ /// [in]
1052
+ size_t structSize;
1053
+ /// [in] assign to NULL
1054
+ void* pPriv;
1055
+ /// [in]
1056
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1057
+ /// [in] one of 'NVPW_DimUnitName'
1058
+ uint32_t dimUnit;
1059
+ /// [out]
1060
+ const char* pSingularName;
1061
+ /// [out]
1062
+ const char* pPluralName;
1063
+ } NVPW_MetricsEvaluator_DimUnitToString_Params;
1064
+ #define NVPW_MetricsEvaluator_DimUnitToString_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_DimUnitToString_Params, pPluralName)
1065
+
1066
+ NVPA_Status NVPW_MetricsEvaluator_DimUnitToString(NVPW_MetricsEvaluator_DimUnitToString_Params* pParams);
1067
+
1068
+ typedef struct NVPW_MetricsEvaluator_GetMetricDimUnits_Params
1069
+ {
1070
+ /// [in]
1071
+ size_t structSize;
1072
+ /// [in] assign to NULL
1073
+ void* pPriv;
1074
+ /// [in]
1075
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1076
+ /// [in]
1077
+ const struct NVPW_MetricEvalRequest* pMetricEvalRequest;
1078
+ /// [in] set to 'NVPW_MetricEvalRequest_STRUCT_SIZE'
1079
+ size_t metricEvalRequestStructSize;
1080
+ /// [inout] 'pDimUnits' is in, '*pDimUnits' is out
1081
+ NVPW_DimUnitFactor* pDimUnits;
1082
+ /// [inout] if 'pDimUnits' is NULL, number of dim-units available will be returned; otherwise it should be set
1083
+ /// to the number of elements allocated for 'pDimUnits', and on return, it will be overwritten by number of
1084
+ /// elements copied to 'pDimUnits'
1085
+ size_t numDimUnits;
1086
+ /// [in] set to 'NVPW_DimUnitFactor_STRUCT_SIZE'
1087
+ size_t dimUnitFactorStructSize;
1088
+ } NVPW_MetricsEvaluator_GetMetricDimUnits_Params;
1089
+ #define NVPW_MetricsEvaluator_GetMetricDimUnits_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetMetricDimUnits_Params, dimUnitFactorStructSize)
1090
+
1091
+ NVPA_Status NVPW_MetricsEvaluator_GetMetricDimUnits(NVPW_MetricsEvaluator_GetMetricDimUnits_Params* pParams);
1092
+
1093
+ typedef struct NVPW_MetricsEvaluator_SetUserData_Params
1094
+ {
1095
+ /// [in]
1096
+ size_t structSize;
1097
+ /// [in] assign to NULL
1098
+ void* pPriv;
1099
+ /// [in]
1100
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1101
+ /// [in] duration in ns of user defined frame
1102
+ double frameDuration;
1103
+ /// [in] duration in ns of user defined region
1104
+ double regionDuration;
1105
+ /// [in]
1106
+ NVPA_Bool isolated;
1107
+ } NVPW_MetricsEvaluator_SetUserData_Params;
1108
+ #define NVPW_MetricsEvaluator_SetUserData_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_SetUserData_Params, isolated)
1109
+
1110
+ NVPA_Status NVPW_MetricsEvaluator_SetUserData(NVPW_MetricsEvaluator_SetUserData_Params* pParams);
1111
+
1112
+ typedef struct NVPW_MetricsEvaluator_EvaluateToGpuValues_Params
1113
+ {
1114
+ /// [in]
1115
+ size_t structSize;
1116
+ /// [in] assign to NULL
1117
+ void* pPriv;
1118
+ /// [in]
1119
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1120
+ /// [in]
1121
+ const struct NVPW_MetricEvalRequest* pMetricEvalRequests;
1122
+ /// [in]
1123
+ size_t numMetricEvalRequests;
1124
+ /// [in] set to 'NVPW_MetricEvalRequest_STRUCT_SIZE'
1125
+ size_t metricEvalRequestStructSize;
1126
+ /// [in] set to sizeof('NVPW_MetricEvalRequest')
1127
+ size_t metricEvalRequestStrideSize;
1128
+ /// [in]
1129
+ const uint8_t* pCounterDataImage;
1130
+ /// [in]
1131
+ size_t counterDataImageSize;
1132
+ /// [in]
1133
+ size_t rangeIndex;
1134
+ /// [in]
1135
+ NVPA_Bool isolated;
1136
+ /// [inout] 'pMetricValues' is in, '*pMetricValues' is out
1137
+ double* pMetricValues;
1138
+ } NVPW_MetricsEvaluator_EvaluateToGpuValues_Params;
1139
+ #define NVPW_MetricsEvaluator_EvaluateToGpuValues_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_EvaluateToGpuValues_Params, pMetricValues)
1140
+
1141
+ NVPA_Status NVPW_MetricsEvaluator_EvaluateToGpuValues(NVPW_MetricsEvaluator_EvaluateToGpuValues_Params* pParams);
1142
+
1143
+ typedef struct NVPW_MetricsEvaluator_SetDeviceAttributes_Params
1144
+ {
1145
+ /// [in]
1146
+ size_t structSize;
1147
+ /// [in] assign to NULL
1148
+ void* pPriv;
1149
+ /// [in]
1150
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1151
+ /// [in]
1152
+ const uint8_t* pCounterDataImage;
1153
+ /// [in]
1154
+ size_t counterDataImageSize;
1155
+ } NVPW_MetricsEvaluator_SetDeviceAttributes_Params;
1156
+ #define NVPW_MetricsEvaluator_SetDeviceAttributes_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_SetDeviceAttributes_Params, counterDataImageSize)
1157
+
1158
+ NVPA_Status NVPW_MetricsEvaluator_SetDeviceAttributes(NVPW_MetricsEvaluator_SetDeviceAttributes_Params* pParams);
1159
+
1160
+ /**
1161
+ * @}
1162
+ ******************************************************************************/
1163
+
1164
+
1165
+ #endif // NVPERF_HOST_API_DEFINED
1166
+
1167
+
1168
+
1169
+
1170
+ #ifdef __cplusplus
1171
+ } // extern "C"
1172
+ #endif
1173
+
1174
+ #if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
1175
+ #pragma GCC visibility pop
1176
+ #endif
1177
+
1178
+ #endif // NVPERF_HOST_H
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/include/nvperf_target.h ADDED
@@ -0,0 +1,626 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef NVPERF_TARGET_H
2
+ #define NVPERF_TARGET_H
3
+
4
+ /*
5
+ * Copyright 2014-2024 NVIDIA Corporation. All rights reserved.
6
+ *
7
+ * NOTICE TO USER:
8
+ *
9
+ * This source code is subject to NVIDIA ownership rights under U.S. and
10
+ * international Copyright laws.
11
+ *
12
+ * This software and the information contained herein is PROPRIETARY and
13
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
14
+ * of a form of NVIDIA software license agreement.
15
+ *
16
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
17
+ * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
18
+ * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
19
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
20
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
21
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
22
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
23
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
24
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
25
+ * OR PERFORMANCE OF THIS SOURCE CODE.
26
+ *
27
+ * U.S. Government End Users. This source code is a "commercial item" as
28
+ * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
29
+ * "commercial computer software" and "commercial computer software
30
+ * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
31
+ * and is provided to the U.S. Government only as a commercial end item.
32
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
33
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
34
+ * source code with only those rights set forth herein.
35
+ *
36
+ * Any use of this source code in individual and commercial software must
37
+ * include, in the user documentation and internal comments to the code,
38
+ * the above Disclaimer and U.S. Government End Users Notice.
39
+ */
40
+
41
+ #include <stddef.h>
42
+ #include <stdint.h>
43
+ #include "nvperf_common.h"
44
+
45
+ #if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
46
+ #pragma GCC visibility push(default)
47
+ #if !defined(NVPW_LOCAL)
48
+ #define NVPW_LOCAL __attribute__ ((visibility ("hidden")))
49
+ #endif
50
+ #else
51
+ #if !defined(NVPW_LOCAL)
52
+ #define NVPW_LOCAL
53
+ #endif
54
+ #endif
55
+
56
+ #ifdef __cplusplus
57
+ extern "C" {
58
+ #endif
59
+
60
+ /**
61
+ * @file nvperf_target.h
62
+ */
63
+
64
+ #ifndef NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_DEFINED
65
+ #define NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_DEFINED
66
+ /// GPU architecture support level
67
+ typedef enum NVPW_GpuArchitectureSupportLevel
68
+ {
69
+ NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_UNKNOWN = 0,
70
+ NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_UNSUPPORTED,
71
+ NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_SUPPORTED
72
+ } NVPW_GpuArchitectureSupportLevel;
73
+ #endif //NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_DEFINED
74
+
75
+ #ifndef NVPW_SLI_SUPPORT_LEVEL_DEFINED
76
+ #define NVPW_SLI_SUPPORT_LEVEL_DEFINED
77
+ /// SLI configuration support level
78
+ typedef enum NVPW_SliSupportLevel
79
+ {
80
+ NVPW_SLI_SUPPORT_LEVEL_UNKNOWN = 0,
81
+ NVPW_SLI_SUPPORT_LEVEL_UNSUPPORTED,
82
+ /// Only Non-SLI configurations are supported.
83
+ NVPW_SLI_SUPPORT_LEVEL_SUPPORTED_NON_SLI_CONFIGURATION
84
+ } NVPW_SliSupportLevel;
85
+ #endif //NVPW_SLI_SUPPORT_LEVEL_DEFINED
86
+
87
+ #ifndef NVPW_VGPU_SUPPORT_LEVEL_DEFINED
88
+ #define NVPW_VGPU_SUPPORT_LEVEL_DEFINED
89
+ /// Virtualized GPU configuration support level
90
+ typedef enum NVPW_VGpuSupportLevel
91
+ {
92
+ NVPW_VGPU_SUPPORT_LEVEL_UNKNOWN = 0,
93
+ NVPW_VGPU_SUPPORT_LEVEL_UNSUPPORTED,
94
+ /// Supported but not allowed by system admin.
95
+ NVPW_VGPU_SUPPORT_LEVEL_SUPPORTED_DISALLOWED,
96
+ NVPW_VGPU_SUPPORT_LEVEL_SUPPORTED_ALLOWED,
97
+ NVPW_VGPU_SUPPORT_LEVEL_SUPPORTED_NON_VGPU_CONFIGURATION
98
+ } NVPW_VGpuSupportLevel;
99
+ #endif //NVPW_VGPU_SUPPORT_LEVEL_DEFINED
100
+
101
+ #ifndef NVPW_CONF_COMPUTE_SUPPORT_LEVEL_DEFINED
102
+ #define NVPW_CONF_COMPUTE_SUPPORT_LEVEL_DEFINED
103
+ /// Confidential Compute mode support level
104
+ typedef enum NVPW_ConfidentialComputeSupportLevel
105
+ {
106
+ NVPW_CONF_COMPUTE_SUPPORT_LEVEL_UNKNOWN = 0,
107
+ NVPW_CONF_COMPUTE_SUPPORT_LEVEL_UNSUPPORTED,
108
+ NVPW_CONF_COMPUTE_SUPPORT_LEVEL_SUPPORTED_NON_CONF_COMPUTE_CONFIGURATION,
109
+ NVPW_CONF_COMPUTE_SUPPORT_LEVEL_SUPPORTED_CONF_COMPUTE_DEVTOOLS_MODE
110
+ } NVPW_ConfidentialComputeSupportLevel;
111
+ #endif //NVPW_CONF_COMPUTE_SUPPORT_LEVEL_DEFINED
112
+
113
+ #ifndef NVPW_CMP_SUPPORT_LEVEL_DEFINED
114
+ #define NVPW_CMP_SUPPORT_LEVEL_DEFINED
115
+ /// CMP support level
116
+ typedef enum NVPW_CmpSupportLevel
117
+ {
118
+ NVPW_CMP_SUPPORT_LEVEL_UNKNOWN = 0,
119
+ NVPW_CMP_SUPPORT_LEVEL_UNSUPPORTED,
120
+ NVPW_CMP_SUPPORT_LEVEL_SUPPORTED_NON_CMP_CONFIGURATON
121
+ } NVPW_CmpSupportLevel;
122
+ #endif //NVPW_CMP_SUPPORT_LEVEL_DEFINED
123
+
124
+ #ifndef NVPW_WSL_SUPPORT_LEVEL_DEFINED
125
+ #define NVPW_WSL_SUPPORT_LEVEL_DEFINED
126
+ /// WSL support level
127
+ typedef enum NVPW_WslSupportLevel
128
+ {
129
+ NVPW_WSL_SUPPORT_LEVEL_UNKNOWN = 0,
130
+ NVPW_WSL_SUPPORT_LEVEL_UNSUPPORTED_INSUFFICIENT_DRIVER_VERSION,
131
+ NVPW_WSL_SUPPORT_LEVEL_SUPPORTED,
132
+ NVPW_WSL_SUPPORT_LEVEL_SUPPORTED_NON_WSL_CONFIGURATION
133
+ } NVPW_WslSupportLevel;
134
+ #endif //NVPW_WSL_SUPPORT_LEVEL_DEFINED
135
+
136
+ #ifndef NVPW_MIG_SUPPORT_LEVEL_DEFINED
137
+ #define NVPW_MIG_SUPPORT_LEVEL_DEFINED
138
+ /// MIG support level
139
+ typedef enum NVPW_MigSupportLevel
140
+ {
141
+ NVPW_MIG_SUPPORT_LEVEL_UNKNOWN = 0,
142
+ NVPW_MIG_SUPPORT_LEVEL_UNSUPPORTED,
143
+ NVPW_MIG_SUPPORT_LEVEL_SUPPORTED,
144
+ NVPW_MIG_SUPPORT_LEVEL_SUPPORTED_NON_MIG_CONFIGURATION
145
+ } NVPW_MigSupportLevel;
146
+ #endif //NVPW_MIG_SUPPORT_LEVEL_DEFINED
147
+
148
+ typedef struct NVPW_InitializeTarget_Params
149
+ {
150
+ /// [in]
151
+ size_t structSize;
152
+ /// [in] assign to NULL
153
+ void* pPriv;
154
+ } NVPW_InitializeTarget_Params;
155
+ #define NVPW_InitializeTarget_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_InitializeTarget_Params, pPriv)
156
+
157
+ /// Load the target library.
158
+ NVPA_Status NVPW_InitializeTarget(NVPW_InitializeTarget_Params* pParams);
159
+
160
+ typedef struct NVPW_GetDeviceCount_Params
161
+ {
162
+ /// [in]
163
+ size_t structSize;
164
+ /// [in] assign to NULL
165
+ void* pPriv;
166
+ size_t numDevices;
167
+ } NVPW_GetDeviceCount_Params;
168
+ #define NVPW_GetDeviceCount_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_GetDeviceCount_Params, numDevices)
169
+
170
+ NVPA_Status NVPW_GetDeviceCount(NVPW_GetDeviceCount_Params* pParams);
171
+
172
+ typedef struct NVPW_Device_GetNames_Params
173
+ {
174
+ /// [in]
175
+ size_t structSize;
176
+ /// [in] assign to NULL
177
+ void* pPriv;
178
+ size_t deviceIndex;
179
+ const char* pDeviceName;
180
+ const char* pChipName;
181
+ } NVPW_Device_GetNames_Params;
182
+ #define NVPW_Device_GetNames_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Device_GetNames_Params, pChipName)
183
+
184
+ NVPA_Status NVPW_Device_GetNames(NVPW_Device_GetNames_Params* pParams);
185
+
186
+ typedef struct NVPW_PciBusId
187
+ {
188
+ /// The PCI domain on which the device bus resides.
189
+ uint32_t domain;
190
+ /// The bus on which the device resides.
191
+ uint16_t bus;
192
+ /// device ID.
193
+ uint16_t device;
194
+ } NVPW_PciBusId;
195
+ #define NVPW_PciBusId_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_PciBusId, device)
196
+
197
+ typedef struct NVPW_Device_GetPciBusIds_Params
198
+ {
199
+ /// [in]
200
+ size_t structSize;
201
+ /// [in] assign to NULL
202
+ void* pPriv;
203
+ /// [in] caller-allocated array of NVPW_PciBusId, indexed by NVPW deviceIndex
204
+ NVPW_PciBusId* pBusIds;
205
+ /// [in] size of the pBusIDs array; use result from NVPW_GetDeviceCount
206
+ size_t numDevices;
207
+ } NVPW_Device_GetPciBusIds_Params;
208
+ #define NVPW_Device_GetPciBusIds_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Device_GetPciBusIds_Params, numDevices)
209
+
210
+ NVPA_Status NVPW_Device_GetPciBusIds(NVPW_Device_GetPciBusIds_Params* pParams);
211
+
212
+
213
+ #define NVPW_DEVICE_MIG_GPU_INSTANCE_ID_INVALID 0xFFFFFFFFu
214
+ #define NVPW_DEVICE_MIG_GPU_INSTANCE_ID_FULLCHIP 0xFFFFFFFEu
215
+
216
+
217
+ typedef struct NVPW_Device_GetMigAttributes_Params
218
+ {
219
+ /// [in]
220
+ size_t structSize;
221
+ /// [in] assign to NULL
222
+ void* pPriv;
223
+ /// [in]
224
+ size_t deviceIndex;
225
+ /// [out]
226
+ NVPA_Bool isMigPartition;
227
+ /// [out]
228
+ uint32_t gpuInstanceId;
229
+ /// [out]
230
+ uint32_t computeInstanceId;
231
+ } NVPW_Device_GetMigAttributes_Params;
232
+ #define NVPW_Device_GetMigAttributes_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Device_GetMigAttributes_Params, computeInstanceId)
233
+
234
+ NVPA_Status NVPW_Device_GetMigAttributes(NVPW_Device_GetMigAttributes_Params* pParams);
235
+
236
+ typedef struct NVPW_Adapter_GetDeviceIndex_Params
237
+ {
238
+ /// [in]
239
+ size_t structSize;
240
+ /// [in] assign to NULL
241
+ void* pPriv;
242
+ /// [in]
243
+ struct IDXGIAdapter* pAdapter;
244
+ /// [in]
245
+ size_t sliIndex;
246
+ /// [out]
247
+ size_t deviceIndex;
248
+ } NVPW_Adapter_GetDeviceIndex_Params;
249
+ #define NVPW_Adapter_GetDeviceIndex_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Adapter_GetDeviceIndex_Params, deviceIndex)
250
+
251
+ NVPA_Status NVPW_Adapter_GetDeviceIndex(NVPW_Adapter_GetDeviceIndex_Params* pParams);
252
+
253
+ typedef struct NVPW_CounterData_GetNumRanges_Params
254
+ {
255
+ /// [in]
256
+ size_t structSize;
257
+ /// [in] assign to NULL
258
+ void* pPriv;
259
+ const uint8_t* pCounterDataImage;
260
+ size_t numRanges;
261
+ } NVPW_CounterData_GetNumRanges_Params;
262
+ #define NVPW_CounterData_GetNumRanges_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterData_GetNumRanges_Params, numRanges)
263
+
264
+ NVPA_Status NVPW_CounterData_GetNumRanges(NVPW_CounterData_GetNumRanges_Params* pParams);
265
+
266
+ typedef struct NVPW_CounterData_GetChipName_Params
267
+ {
268
+ /// [in]
269
+ size_t structSize;
270
+ /// [in] assign to NULL
271
+ void* pPriv;
272
+ /// [in]
273
+ const uint8_t* pCounterDataImage;
274
+ /// [in]
275
+ size_t counterDataImageSize;
276
+ /// [out]
277
+ const char* pChipName;
278
+ } NVPW_CounterData_GetChipName_Params;
279
+ #define NVPW_CounterData_GetChipName_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterData_GetChipName_Params, pChipName)
280
+
281
+ NVPA_Status NVPW_CounterData_GetChipName(NVPW_CounterData_GetChipName_Params* pParams);
282
+
283
+ typedef struct NVPW_Config_GetNumPasses_Params
284
+ {
285
+ /// [in]
286
+ size_t structSize;
287
+ /// [in] assign to NULL
288
+ void* pPriv;
289
+ /// [in]
290
+ const uint8_t* pConfig;
291
+ /// [out]
292
+ size_t numPipelinedPasses;
293
+ /// [out]
294
+ size_t numIsolatedPasses;
295
+ } NVPW_Config_GetNumPasses_Params;
296
+ #define NVPW_Config_GetNumPasses_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Config_GetNumPasses_Params, numIsolatedPasses)
297
+
298
+ /// Total num passes = numPipelinedPasses + numIsolatedPasses * numNestingLevels
299
+ NVPA_Status NVPW_Config_GetNumPasses(NVPW_Config_GetNumPasses_Params* pParams);
300
+
301
+ typedef struct NVPW_Config_GetNumPasses_V2_Params
302
+ {
303
+ /// [in]
304
+ size_t structSize;
305
+ /// [in] assign to NULL
306
+ void* pPriv;
307
+ /// [in]
308
+ const uint8_t* pConfig;
309
+ /// [out]
310
+ size_t numPasses;
311
+ } NVPW_Config_GetNumPasses_V2_Params;
312
+ #define NVPW_Config_GetNumPasses_V2_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Config_GetNumPasses_V2_Params, numPasses)
313
+
314
+ /// Total num passes = numPasses * numNestingLevels
315
+ NVPA_Status NVPW_Config_GetNumPasses_V2(NVPW_Config_GetNumPasses_V2_Params* pParams);
316
+
317
+ #define NVPW_API_SET_CUDA_PROFILER 0x18209d0775b2f89dULL
318
+
319
+ #define NVPW_API_SET_D3D11_PROFILER 0xca55c6738445db2bULL
320
+
321
+ #define NVPW_API_SET_D3D12_PROFILER 0xc0c2d46dd7c7ad78ULL
322
+
323
+ #define NVPW_API_SET_EGL_PROFILER 0x3c3747dae1f9565cULL
324
+
325
+ #define NVPW_API_SET_GPU_PERIODICSAMPLER 0x9f4c2571fc0b2e8aULL
326
+
327
+ #define NVPW_API_SET_METRICSEVALUATOR 0x0368a8768d811af9ULL
328
+
329
+ #define NVPW_API_SET_METRICS_AD10X_COMP 0xbe57278e12cb5288ULL
330
+
331
+ #define NVPW_API_SET_METRICS_AD10X_GRFX 0x5cbf0774f81bf491ULL
332
+
333
+ #define NVPW_API_SET_METRICS_GA100_COMP 0x16b7d8c20d8b4915ULL
334
+
335
+ #define NVPW_API_SET_METRICS_GA100_GRFX 0xc94eaabec04a94faULL
336
+
337
+ #define NVPW_API_SET_METRICS_GA10X_COMP 0xb5d6391c2e299ab5ULL
338
+
339
+ #define NVPW_API_SET_METRICS_GA10X_GRFX 0x6ebc121178b5ce0bULL
340
+
341
+ #define NVPW_API_SET_METRICS_GV100_COMP 0x863705cc57919f72ULL
342
+
343
+ #define NVPW_API_SET_METRICS_GV100_GRFX 0x9900da75d164fecfULL
344
+
345
+ #define NVPW_API_SET_METRICS_GV11B_COMP 0xd3f79a859235848fULL
346
+
347
+ #define NVPW_API_SET_METRICS_GV11B_GRFX 0xeb8e26220106e227ULL
348
+
349
+ #define NVPW_API_SET_METRICS_TU10X_COMP 0x70f40be0afd35da8ULL
350
+
351
+ #define NVPW_API_SET_METRICS_TU10X_GRFX 0xdf219cb838db6968ULL
352
+
353
+ #define NVPW_API_SET_METRICS_TU11X_COMP 0xeb0069d7d0956678ULL
354
+
355
+ #define NVPW_API_SET_METRICS_TU11X_GRFX 0x0977d9342bd62743ULL
356
+
357
+ #define NVPW_API_SET_OPENGL_PROFILER 0xe4cd9ea40f2ee777ULL
358
+
359
+ #define NVPW_API_SET_VULKAN_PROFILER 0x8c56b6a03d779689ULL
360
+
361
+ #define NVPW_SDK_VERSION 0x1e128b6f001423fcULL
362
+
363
+ typedef struct NVPW_QueryVersionNumber_Params
364
+ {
365
+ /// [in]
366
+ size_t structSize;
367
+ /// [in] assign to NULL
368
+ void* pPriv;
369
+ /// [in]
370
+ uint64_t apiSet;
371
+ /// [out]
372
+ uint32_t major;
373
+ /// [out]
374
+ uint32_t minor;
375
+ /// [out]
376
+ uint32_t patch;
377
+ /// [out]
378
+ uint32_t relMajor;
379
+ /// [out]
380
+ uint32_t relMinor;
381
+ /// [out]
382
+ uint32_t relPatch;
383
+ } NVPW_QueryVersionNumber_Params;
384
+ #define NVPW_QueryVersionNumber_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_QueryVersionNumber_Params, relPatch)
385
+
386
+ /// Query version number of an API set
387
+ NVPA_Status NVPW_QueryVersionNumber(NVPW_QueryVersionNumber_Params* pParams);
388
+
389
+ typedef enum NVPW_Device_ClockStatus
390
+ {
391
+ /// clock status is unknown
392
+ NVPW_DEVICE_CLOCK_STATUS_UNKNOWN,
393
+ /// clocks are locked to rated tdp values - Deprecated, use NVPW_DEVICE_CLOCK_STATUS_LOCKED instead
394
+ NVPW_DEVICE_CLOCK_STATUS_LOCKED_TO_RATED_TDP,
395
+ /// clocks are not locked and can boost above rated tdp
396
+ NVPW_DEVICE_CLOCK_STATUS_BOOST_ENABLED,
397
+ /// clocks are not locked and will not go above rated tdp
398
+ NVPW_DEVICE_CLOCK_STATUS_BOOST_DISABLED,
399
+ /// clocks are locked
400
+ NVPW_DEVICE_CLOCK_STATUS_LOCKED,
401
+ /// clocks are not locked
402
+ NVPW_DEVICE_CLOCK_STATUS_UNLOCKED,
403
+ NVPW_DEVICE_CLOCK_STATUS__COUNT
404
+ } NVPW_Device_ClockStatus;
405
+
406
+ typedef enum NVPW_Device_ClockLevel
407
+ {
408
+ /// clock level is invalid
409
+ NVPW_DEVICE_CLOCK_LEVEL_INVALID,
410
+ /// clock level is at rated tdp
411
+ NVPW_DEVICE_CLOCK_LEVEL_RATED_TDP,
412
+ /// clock level is at turbo boost
413
+ NVPW_DEVICE_CLOCK_LEVEL_TURBO_BOOST,
414
+ NVPW_DEVICE_CLOCK_LEVEL__COUNT
415
+ } NVPW_Device_ClockLevel;
416
+
417
+ typedef struct NVPW_Device_GetClockStatus_Params
418
+ {
419
+ /// [in]
420
+ size_t structSize;
421
+ /// [in] assign to NULL
422
+ void* pPriv;
423
+ size_t deviceIndex;
424
+ /// [in]
425
+ NVPW_Device_ClockStatus clockStatus;
426
+ /// [in]
427
+ NVPW_Device_ClockLevel clockLevel;
428
+ } NVPW_Device_GetClockStatus_Params;
429
+ #define NVPW_Device_GetClockStatus_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Device_GetClockStatus_Params, clockLevel)
430
+
431
+ NVPA_Status NVPW_Device_GetClockStatus(NVPW_Device_GetClockStatus_Params* pParams);
432
+
433
+ typedef enum NVPW_Device_ClockSetting
434
+ {
435
+ /// invalid op, specify valid clocks operation during profiling
436
+ NVPW_DEVICE_CLOCK_SETTING_INVALID,
437
+ /// default to driver/application config (normally unlocked and not boosted, but could be unlocked boosted, or
438
+ /// locked to rated TDP)
439
+ NVPW_DEVICE_CLOCK_SETTING_DEFAULT,
440
+ /// lock clocks at rated tdp base values
441
+ NVPW_DEVICE_CLOCK_SETTING_LOCK_TO_RATED_TDP,
442
+ /// lock clocks at turbo boost values
443
+ NVPW_DEVICE_CLOCK_SETTING_LOCK_TO_TURBO_BOOST,
444
+ NVPW_DEVICE_CLOCK_SETTING__COUNT
445
+ } NVPW_Device_ClockSetting;
446
+
447
+ typedef struct NVPW_Device_SetClockSetting_Params
448
+ {
449
+ /// [in]
450
+ size_t structSize;
451
+ /// [in] assign to NULL
452
+ void* pPriv;
453
+ size_t deviceIndex;
454
+ /// [in]
455
+ NVPW_Device_ClockSetting clockSetting;
456
+ } NVPW_Device_SetClockSetting_Params;
457
+ #define NVPW_Device_SetClockSetting_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Device_SetClockSetting_Params, clockSetting)
458
+
459
+ NVPA_Status NVPW_Device_SetClockSetting(NVPW_Device_SetClockSetting_Params* pParams);
460
+
461
+ typedef struct NVPW_CounterData_GetRangeDescriptions_Params
462
+ {
463
+ /// [in]
464
+ size_t structSize;
465
+ /// [in] assign to NULL
466
+ void* pPriv;
467
+ const uint8_t* pCounterDataImage;
468
+ size_t rangeIndex;
469
+ /// [inout] Number of descriptions allocated in ppDescriptions
470
+ size_t numDescriptions;
471
+ const char** ppDescriptions;
472
+ } NVPW_CounterData_GetRangeDescriptions_Params;
473
+ #define NVPW_CounterData_GetRangeDescriptions_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterData_GetRangeDescriptions_Params, ppDescriptions)
474
+
475
+ NVPA_Status NVPW_CounterData_GetRangeDescriptions(NVPW_CounterData_GetRangeDescriptions_Params* pParams);
476
+
477
+ typedef struct NVPW_Profiler_CounterData_GetRangeDescriptions_Params
478
+ {
479
+ /// [in]
480
+ size_t structSize;
481
+ /// [in] assign to NULL
482
+ void* pPriv;
483
+ const uint8_t* pCounterDataImage;
484
+ size_t rangeIndex;
485
+ /// [inout] Number of descriptions allocated in ppDescriptions
486
+ size_t numDescriptions;
487
+ const char** ppDescriptions;
488
+ } NVPW_Profiler_CounterData_GetRangeDescriptions_Params;
489
+ #define NVPW_Profiler_CounterData_GetRangeDescriptions_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Profiler_CounterData_GetRangeDescriptions_Params, ppDescriptions)
490
+
491
+ NVPA_Status NVPW_Profiler_CounterData_GetRangeDescriptions(NVPW_Profiler_CounterData_GetRangeDescriptions_Params* pParams);
492
+
493
+ #ifndef NVPW_PERIODIC_SAMPLER_COUNTER_DATA_APPEND_MODE_DEFINED
494
+ #define NVPW_PERIODIC_SAMPLER_COUNTER_DATA_APPEND_MODE_DEFINED
495
+ typedef enum NVPW_PeriodicSampler_CounterData_AppendMode
496
+ {
497
+ NVPW_PERIODIC_SAMPLER_COUNTER_DATA_APPEND_MODE_LINEAR = 0,
498
+ NVPW_PERIODIC_SAMPLER_COUNTER_DATA_APPEND_MODE_CIRCULAR = 1,
499
+ NVPW_PERIODIC_SAMPLER_COUNTER_DATA_APPEND_MODE__COUNT
500
+ } NVPW_PeriodicSampler_CounterData_AppendMode;
501
+ #endif //NVPW_PERIODIC_SAMPLER_COUNTER_DATA_APPEND_MODE_DEFINED
502
+
503
+ typedef struct NVPW_PeriodicSampler_CounterData_GetSampleTime_Params
504
+ {
505
+ /// [in]
506
+ size_t structSize;
507
+ /// [in] assign to NULL
508
+ void* pPriv;
509
+ /// [in]
510
+ const uint8_t* pCounterDataImage;
511
+ /// [in]
512
+ size_t rangeIndex;
513
+ /// [out]
514
+ uint64_t timestampStart;
515
+ /// [out]
516
+ uint64_t timestampEnd;
517
+ } NVPW_PeriodicSampler_CounterData_GetSampleTime_Params;
518
+ #define NVPW_PeriodicSampler_CounterData_GetSampleTime_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_PeriodicSampler_CounterData_GetSampleTime_Params, timestampEnd)
519
+
520
+ NVPA_Status NVPW_PeriodicSampler_CounterData_GetSampleTime(NVPW_PeriodicSampler_CounterData_GetSampleTime_Params* pParams);
521
+
522
+ typedef struct NVPW_PeriodicSampler_CounterData_TrimInPlace_Params
523
+ {
524
+ /// [in]
525
+ size_t structSize;
526
+ /// [in] assign to NULL
527
+ void* pPriv;
528
+ /// [in]
529
+ uint8_t* pCounterDataImage;
530
+ /// [in]
531
+ size_t counterDataImageSize;
532
+ /// [out]
533
+ size_t counterDataImageTrimmedSize;
534
+ } NVPW_PeriodicSampler_CounterData_TrimInPlace_Params;
535
+ #define NVPW_PeriodicSampler_CounterData_TrimInPlace_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_PeriodicSampler_CounterData_TrimInPlace_Params, counterDataImageTrimmedSize)
536
+
537
+ NVPA_Status NVPW_PeriodicSampler_CounterData_TrimInPlace(NVPW_PeriodicSampler_CounterData_TrimInPlace_Params* pParams);
538
+
539
+ typedef struct NVPW_PeriodicSampler_CounterData_GetInfo_Params
540
+ {
541
+ /// [in]
542
+ size_t structSize;
543
+ /// [in] assign to NULL
544
+ void* pPriv;
545
+ /// [in]
546
+ const uint8_t* pCounterDataImage;
547
+ /// [in]
548
+ size_t counterDataImageSize;
549
+ /// [out] total number of ranges in the counter data
550
+ size_t numTotalRanges;
551
+ /// [out] if in "linear" mode, this API returns the number of "populated" ranges; if it's in "circular" mode,
552
+ /// then it returns the last "populated" range index + 1, when there is no such range, it returns 0.
553
+ size_t numPopulatedRanges;
554
+ /// [out] if in "linear" mode, this API returns the number of "completed" ranges; if it's in "circular" mode,
555
+ /// then it returns the last "completed" range index + 1, when there is no such range, it returns 0.
556
+ size_t numCompletedRanges;
557
+ } NVPW_PeriodicSampler_CounterData_GetInfo_Params;
558
+ #define NVPW_PeriodicSampler_CounterData_GetInfo_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_PeriodicSampler_CounterData_GetInfo_Params, numCompletedRanges)
559
+
560
+ /// In periodic sampler, a range in counter data stores exactly one sample's data. For better performance, periodic
561
+ /// sampler may operate in an out-of-order fashion when populating sample data, i.e. it may not fully populate all
562
+ /// counters of a sample/range before starting to populate the next sample/range. As a result, we have two concepts
563
+ /// here, "populated" & "completed": a range is considered "populated" even if only partial counters have been
564
+ /// written; on the other hand, a range is only considered "completed" if all the collecting counters have been
565
+ /// written.
566
+ NVPA_Status NVPW_PeriodicSampler_CounterData_GetInfo(NVPW_PeriodicSampler_CounterData_GetInfo_Params* pParams);
567
+
568
+ typedef struct NVPW_PeriodicSampler_CounterData_GetTriggerCount_Params
569
+ {
570
+ /// [in]
571
+ size_t structSize;
572
+ /// [in] assign to NULL
573
+ void* pPriv;
574
+ /// [in]
575
+ const uint8_t* pCounterDataImage;
576
+ /// [in]
577
+ size_t counterDataImageSize;
578
+ /// [in]
579
+ size_t rangeIndex;
580
+ /// [out]
581
+ uint32_t triggerCount;
582
+ } NVPW_PeriodicSampler_CounterData_GetTriggerCount_Params;
583
+ #define NVPW_PeriodicSampler_CounterData_GetTriggerCount_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_PeriodicSampler_CounterData_GetTriggerCount_Params, triggerCount)
584
+
585
+ NVPA_Status NVPW_PeriodicSampler_CounterData_GetTriggerCount(NVPW_PeriodicSampler_CounterData_GetTriggerCount_Params* pParams);
586
+
587
+ typedef struct NVPW_PeriodicSampler_CounterData_IsDataComplete_Params
588
+ {
589
+ /// [in]
590
+ size_t structSize;
591
+ /// [in] assign to NULL
592
+ void* pPriv;
593
+ /// [in]
594
+ const uint8_t* pCounterDataImage;
595
+ /// [in]
596
+ size_t counterDataImageSize;
597
+ /// [in]
598
+ size_t rangeIndex;
599
+ /// [out]
600
+ NVPA_Bool isComplete;
601
+ } NVPW_PeriodicSampler_CounterData_IsDataComplete_Params;
602
+ #define NVPW_PeriodicSampler_CounterData_IsDataComplete_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_PeriodicSampler_CounterData_IsDataComplete_Params, isComplete)
603
+
604
+ /// Checks whether a given sample's data is complete. See also 'NVPW_PeriodicSampler_CounterData_GetInfo'
605
+ NVPA_Status NVPW_PeriodicSampler_CounterData_IsDataComplete(NVPW_PeriodicSampler_CounterData_IsDataComplete_Params* pParams);
606
+
607
+
608
+ typedef struct NVPW_TimestampReport
609
+ {
610
+ uint32_t payload;
611
+ uint8_t reserved0004[4];
612
+ uint64_t timestamp;
613
+ } NVPW_TimestampReport;
614
+
615
+
616
+
617
+
618
+ #ifdef __cplusplus
619
+ } // extern "C"
620
+ #endif
621
+
622
+ #if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
623
+ #pragma GCC visibility pop
624
+ #endif
625
+
626
+ #endif // NVPERF_TARGET_H
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/lib/__init__.py ADDED
File without changes
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cuda_cupti/lib/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (227 Bytes). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cufile/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (219 Bytes). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cufile/include/__init__.py ADDED
File without changes
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cufile/include/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (227 Bytes). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cufile/include/cufile.h ADDED
@@ -0,0 +1,740 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 1993-2023 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ /**
51
+ * @file cufile.h
52
+ * @brief cuFile C APIs
53
+ *
54
+ * This file contains all the C APIs to perform GPUDirect Storage supported IO operations
55
+ */
56
+
57
+ #ifdef __cplusplus
58
+ extern "C"
59
+ {
60
+ #endif
61
+
62
+ /// @cond DOXYGEN_SKIP_MACRO
63
+ #ifndef __CUFILE_H_
64
+ #define __CUFILE_H_
65
+
66
+ #include <stdlib.h>
67
+ #include <stdbool.h>
68
+
69
+ #include <cuda.h>
70
+ #include <arpa/inet.h>
71
+ #include <sys/socket.h>
72
+
73
+ #define CUFILEOP_BASE_ERR 5000
74
+
75
+ //Note :Data path errors are captured via standard error codes
76
+ #define CUFILEOP_STATUS_ENTRIES \
77
+ CUFILE_OP(0, CU_FILE_SUCCESS, cufile success) \
78
+ CUFILE_OP(CUFILEOP_BASE_ERR + 1, CU_FILE_DRIVER_NOT_INITIALIZED, nvidia-fs driver is not loaded. Set allow_compat_mode to true in cufile.json file to enable compatible mode) \
79
+ CUFILE_OP(CUFILEOP_BASE_ERR + 2, CU_FILE_DRIVER_INVALID_PROPS, invalid property) \
80
+ CUFILE_OP(CUFILEOP_BASE_ERR + 3, CU_FILE_DRIVER_UNSUPPORTED_LIMIT, property range error) \
81
+ CUFILE_OP(CUFILEOP_BASE_ERR + 4, CU_FILE_DRIVER_VERSION_MISMATCH, nvidia-fs driver version mismatch) \
82
+ CUFILE_OP(CUFILEOP_BASE_ERR + 5, CU_FILE_DRIVER_VERSION_READ_ERROR, nvidia-fs driver version read error) \
83
+ CUFILE_OP(CUFILEOP_BASE_ERR + 6, CU_FILE_DRIVER_CLOSING, driver shutdown in progress) \
84
+ CUFILE_OP(CUFILEOP_BASE_ERR + 7, CU_FILE_PLATFORM_NOT_SUPPORTED, GPUDirect Storage not supported on current platform) \
85
+ CUFILE_OP(CUFILEOP_BASE_ERR + 8, CU_FILE_IO_NOT_SUPPORTED, GPUDirect Storage not supported on current file) \
86
+ CUFILE_OP(CUFILEOP_BASE_ERR + 9, CU_FILE_DEVICE_NOT_SUPPORTED, GPUDirect Storage not supported on current GPU) \
87
+ CUFILE_OP(CUFILEOP_BASE_ERR + 10, CU_FILE_NVFS_DRIVER_ERROR, nvidia-fs driver ioctl error) \
88
+ CUFILE_OP(CUFILEOP_BASE_ERR + 11, CU_FILE_CUDA_DRIVER_ERROR, CUDA Driver API error) \
89
+ CUFILE_OP(CUFILEOP_BASE_ERR + 12, CU_FILE_CUDA_POINTER_INVALID, invalid device pointer) \
90
+ CUFILE_OP(CUFILEOP_BASE_ERR + 13, CU_FILE_CUDA_MEMORY_TYPE_INVALID, invalid pointer memory type) \
91
+ CUFILE_OP(CUFILEOP_BASE_ERR + 14, CU_FILE_CUDA_POINTER_RANGE_ERROR, pointer range exceeds allocated address range) \
92
+ CUFILE_OP(CUFILEOP_BASE_ERR + 15, CU_FILE_CUDA_CONTEXT_MISMATCH, cuda context mismatch) \
93
+ CUFILE_OP(CUFILEOP_BASE_ERR + 16, CU_FILE_INVALID_MAPPING_SIZE, access beyond maximum pinned size) \
94
+ CUFILE_OP(CUFILEOP_BASE_ERR + 17, CU_FILE_INVALID_MAPPING_RANGE, access beyond mapped size) \
95
+ CUFILE_OP(CUFILEOP_BASE_ERR + 18, CU_FILE_INVALID_FILE_TYPE, unsupported file type) \
96
+ CUFILE_OP(CUFILEOP_BASE_ERR + 19, CU_FILE_INVALID_FILE_OPEN_FLAG, unsupported file open flags) \
97
+ CUFILE_OP(CUFILEOP_BASE_ERR + 20, CU_FILE_DIO_NOT_SET, fd direct IO not set) \
98
+ CUFILE_OP(CUFILEOP_BASE_ERR + 22, CU_FILE_INVALID_VALUE, invalid arguments) \
99
+ CUFILE_OP(CUFILEOP_BASE_ERR + 23, CU_FILE_MEMORY_ALREADY_REGISTERED, device pointer already registered) \
100
+ CUFILE_OP(CUFILEOP_BASE_ERR + 24, CU_FILE_MEMORY_NOT_REGISTERED, device pointer lookup failure) \
101
+ CUFILE_OP(CUFILEOP_BASE_ERR + 25, CU_FILE_PERMISSION_DENIED, driver or file access error) \
102
+ CUFILE_OP(CUFILEOP_BASE_ERR + 26, CU_FILE_DRIVER_ALREADY_OPEN, driver is already open) \
103
+ CUFILE_OP(CUFILEOP_BASE_ERR + 27, CU_FILE_HANDLE_NOT_REGISTERED, file descriptor is not registered) \
104
+ CUFILE_OP(CUFILEOP_BASE_ERR + 28, CU_FILE_HANDLE_ALREADY_REGISTERED, file descriptor is already registered) \
105
+ CUFILE_OP(CUFILEOP_BASE_ERR + 29, CU_FILE_DEVICE_NOT_FOUND, GPU device not found) \
106
+ CUFILE_OP(CUFILEOP_BASE_ERR + 30, CU_FILE_INTERNAL_ERROR, internal error) \
107
+ CUFILE_OP(CUFILEOP_BASE_ERR + 31, CU_FILE_GETNEWFD_FAILED, failed to obtain new file descriptor) \
108
+ CUFILE_OP(CUFILEOP_BASE_ERR + 33, CU_FILE_NVFS_SETUP_ERROR, NVFS driver initialization error) \
109
+ CUFILE_OP(CUFILEOP_BASE_ERR + 34, CU_FILE_IO_DISABLED, GPUDirect Storage disabled by config on current file)\
110
+ CUFILE_OP(CUFILEOP_BASE_ERR + 35, CU_FILE_BATCH_SUBMIT_FAILED, failed to submit batch operation)\
111
+ CUFILE_OP(CUFILEOP_BASE_ERR + 36, CU_FILE_GPU_MEMORY_PINNING_FAILED, failed to allocate pinned GPU Memory) \
112
+ CUFILE_OP(CUFILEOP_BASE_ERR + 37, CU_FILE_BATCH_FULL, queue full for batch operation) \
113
+ CUFILE_OP(CUFILEOP_BASE_ERR + 38, CU_FILE_ASYNC_NOT_SUPPORTED, cuFile stream operation not supported) \
114
+ CUFILE_OP(CUFILEOP_BASE_ERR + 39, CU_FILE_IO_MAX_ERROR, GPUDirect Storage Max Error)
115
+
116
+
117
+ /**
118
+ * @brief cufileop status enum
119
+ *
120
+ * @note on success the error code is set to @ref CU_FILE_SUCCESS.
121
+ * @note The error code can be inspected using @ref IS_CUFILE_ERR and @ref CUFILE_ERRSTR.
122
+ * @note The error code if set to @ref CU_FILE_CUDA_DRIVER_ERROR, then cuda error can be inspected using @ref IS_CUDA_ERR and @ref CU_FILE_CUDA_ERR.
123
+ * @note Data path errors are captured via standard error codes
124
+ */
125
+ typedef enum CUfileOpError {
126
+ /// @cond DOXYGEN_SKIP_MACRO
127
+ #define CUFILE_OP(code, name, string) name = code,
128
+ CUFILEOP_STATUS_ENTRIES
129
+ #undef CUFILE_OP
130
+ ///@endcond
131
+ } CUfileOpError;
132
+
133
+ /// @endcond
134
+
135
+ /**
136
+ * @brief cufileop status string
137
+ */
138
+ static inline const char *cufileop_status_error(CUfileOpError status)
139
+ {
140
+ switch (status) {
141
+ /// @cond DOXYGEN_SKIP_MACRO
142
+ #define CUFILE_OP(code, name, string) \
143
+ case name: return #string;
144
+ CUFILEOP_STATUS_ENTRIES
145
+ #undef CUFILE_OP
146
+ ///@endcond
147
+ default:return "unknown cufile error";
148
+ }
149
+ }
150
+
151
+ /**
152
+ * @brief cufileop status string
153
+ */
154
+ typedef struct CUfileError {
155
+
156
+ CUfileOpError err; // cufile error
157
+
158
+ CUresult cu_err; // cuda driver error
159
+
160
+ }CUfileError_t;
161
+
162
+ /**
163
+ * @brief error macros to inspect error status of type @ref CUfileOpError
164
+ */
165
+
166
+ #define IS_CUFILE_ERR(err) \
167
+ (abs((err)) > CUFILEOP_BASE_ERR)
168
+
169
+ #define CUFILE_ERRSTR(err) \
170
+ cufileop_status_error((CUfileOpError)abs((err)))
171
+
172
+ #define IS_CUDA_ERR(status) \
173
+ ((status).err == CU_FILE_CUDA_DRIVER_ERROR)
174
+
175
+ #define CU_FILE_CUDA_ERR(status) ((status).cu_err)
176
+
177
+ /* driver properties */
178
+ typedef enum CUfileDriverStatusFlags {
179
+ CU_FILE_LUSTRE_SUPPORTED = 0, /*!< Support for DDN LUSTRE */
180
+
181
+ CU_FILE_WEKAFS_SUPPORTED = 1, /*!< Support for WEKAFS */
182
+
183
+ CU_FILE_NFS_SUPPORTED = 2, /*!< Support for NFS */
184
+
185
+ CU_FILE_GPFS_SUPPORTED = 3, /*! < Support for GPFS */
186
+
187
+ CU_FILE_NVME_SUPPORTED = 4, /*!< Support for NVMe */
188
+
189
+ CU_FILE_NVMEOF_SUPPORTED = 5, /*!< Support for NVMeOF */
190
+
191
+ CU_FILE_SCSI_SUPPORTED = 6, /*!< Support for SCSI */
192
+
193
+ CU_FILE_SCALEFLUX_CSD_SUPPORTED = 7, /*!< Support for Scaleflux CSD*/
194
+
195
+ CU_FILE_NVMESH_SUPPORTED = 8, /*!< Support for NVMesh Block Dev*/
196
+ CU_FILE_BEEGFS_SUPPORTED = 9, /*!< Support for BeeGFS */
197
+ //10 is reserved for YRCloudFile
198
+ CU_FILE_NVME_P2P_SUPPORTED = 11, /*!< Support for NVMe using PCI P2PDMA */
199
+
200
+ }CUfileDriverStatusFlags_t;
201
+
202
+ typedef enum CUfileDriverControlFlags {
203
+ CU_FILE_USE_POLL_MODE = 0 , /*!< use POLL mode. properties.use_poll_mode*/
204
+
205
+ CU_FILE_ALLOW_COMPAT_MODE = 1/*!< allow COMPATIBILITY mode. properties.allow_compat_mode*/
206
+
207
+ }CUfileDriverControlFlags_t;
208
+
209
+ typedef enum CUfileFeatureFlags {
210
+ CU_FILE_DYN_ROUTING_SUPPORTED = 0, /*!< Support for Dynamic routing to handle devices across the PCIe bridges */
211
+
212
+ CU_FILE_BATCH_IO_SUPPORTED = 1, /*!< Unsupported */
213
+
214
+ CU_FILE_STREAMS_SUPPORTED = 2, /*!< Unsupported */
215
+
216
+ CU_FILE_PARALLEL_IO_SUPPORTED = 3 /*!< Unsupported */
217
+ }CUfileFeatureFlags_t;
218
+
219
+ typedef struct CUfileDrvProps {
220
+ struct {
221
+ unsigned int major_version;
222
+
223
+ unsigned int minor_version;
224
+
225
+ size_t poll_thresh_size;
226
+
227
+ size_t max_direct_io_size;
228
+
229
+ unsigned int dstatusflags;
230
+
231
+ unsigned int dcontrolflags;
232
+
233
+ } nvfs;
234
+
235
+ unsigned int fflags;
236
+
237
+ unsigned int max_device_cache_size;
238
+
239
+ unsigned int per_buffer_cache_size;
240
+
241
+ unsigned int max_device_pinned_mem_size;
242
+
243
+ unsigned int max_batch_io_size;
244
+ unsigned int max_batch_io_timeout_msecs;
245
+ }CUfileDrvProps_t;
246
+
247
+ typedef struct sockaddr sockaddr_t;
248
+
249
+ typedef struct cufileRDMAInfo
250
+ {
251
+ int version;
252
+ int desc_len;
253
+ const char *desc_str;
254
+ }cufileRDMAInfo_t;
255
+
256
+ #define CU_FILE_RDMA_REGISTER 1
257
+ #define CU_FILE_RDMA_RELAXED_ORDERING (1<<1)
258
+
259
+
260
+
261
+ typedef struct CUfileFSOps {
262
+ /* NULL means discover using fstat */
263
+ const char* (*fs_type) (void *handle);
264
+
265
+ /* list of host addresses to use, NULL means no restriction */
266
+ int (*getRDMADeviceList)(void *handle, sockaddr_t **hostaddrs);
267
+
268
+ /* -1 no pref */
269
+ int (*getRDMADevicePriority)(void *handle, char*, size_t,
270
+ loff_t, sockaddr_t* hostaddr);
271
+
272
+ /* NULL means try VFS */
273
+ ssize_t (*read) (void *handle, char*, size_t, loff_t, cufileRDMAInfo_t*);
274
+ ssize_t (*write) (void *handle, const char *, size_t, loff_t , cufileRDMAInfo_t*);
275
+ }CUfileFSOps_t;
276
+
277
+ /* File Handle */
278
+ enum CUfileFileHandleType {
279
+ CU_FILE_HANDLE_TYPE_OPAQUE_FD = 1, /*!< Linux based fd */
280
+
281
+ CU_FILE_HANDLE_TYPE_OPAQUE_WIN32 = 2, /*!< Windows based handle (unsupported) */
282
+
283
+ CU_FILE_HANDLE_TYPE_USERSPACE_FS = 3, /* Userspace based FS */
284
+ };
285
+
286
+ typedef struct CUfileDescr_t {
287
+ enum CUfileFileHandleType type; /* type of file being registered */
288
+ union {
289
+ int fd; /* Linux */
290
+ void *handle; /* Windows */
291
+ } handle;
292
+ const CUfileFSOps_t *fs_ops; /* file system operation table */
293
+ }CUfileDescr_t;
294
+
295
+ /**
296
+ * @brief File handle type
297
+ *
298
+ */
299
+ typedef void* CUfileHandle_t;
300
+
301
+
302
+ #pragma GCC visibility push(default)
303
+
304
+ /**
305
+ * @brief cuFileHandleRegister is required, and performs extra checking that is memoized to provide increased performance on later cuFile operations.
306
+ *
307
+ * @param fh @ref CUfileHandle_t opaque file handle for IO operations
308
+ * @param descr @ref CUfileDescr_t file descriptor (OS agnostic)
309
+ *
310
+ * @return CU_FILE_SUCCESS on successful completion. fh will be updated for use in @ref cuFileRead, @ref cuFileWrite, @ref cuFileHandleDeregister
311
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED on failure to load driver
312
+ * @return CU_FILE_IO_NOT_SUPPORTED - if filesystem is not supported
313
+ * @return CU_FILE_INVALID_VALUE if null or bad api arguments
314
+ * @return CU_FILE_INVALID_FILE_OPEN_FLAG if file is opened with unsupported modes like no O_DIRECT
315
+ * @return CU_FILE_INVALID_FILE_TYPE if filepath is not valid or is not a regular file
316
+ * @return CU_FILE_HANDLE_ALREADY_REGISTERED if file handle/descriptor is already registered
317
+ *
318
+ * <b>Description</b>
319
+ * cuFileHandleRegister registers the open file descriptor for use with cuFile IO operations.
320
+ *
321
+ * This API will ensure that the file’s descriptor is checked for GPUDirect Storage support and returns a valid file handle on CU_FILE_SUCCESS.
322
+ *
323
+ * @note the file needs to be opened in O_DIRECT mode to support GPUDirect Storage.
324
+ *
325
+ * @see cuFileRead
326
+ * @see cuFileWrite
327
+ * @see cuFileHandleDeregister
328
+ *
329
+ */
330
+ CUfileError_t cuFileHandleRegister(CUfileHandle_t *fh, CUfileDescr_t *descr);
331
+
332
+ /**
333
+ * @brief releases a registered filehandle from cuFile
334
+ *
335
+ * @param fh @ref CUfileHandle_t file handle
336
+ *
337
+ * @return void
338
+ *
339
+ * @see cuFileHandleRegister
340
+ */
341
+ void cuFileHandleDeregister(CUfileHandle_t fh);
342
+
343
+ /**
344
+ * @brief register an existing cudaMalloced memory with cuFile to pin for GPUDirect Storage access or
345
+ * register host allocated memory with cuFile.
346
+ *
347
+ * @param bufPtr_base buffer pointer allocated
348
+ * @param length size of memory region from the above specified bufPtr
349
+ * @param flags CU_FILE_RDMA_REGISTER
350
+ *
351
+ * @return CU_FILE_SUCCESS on success
352
+ * @return CU_FILE_NVFS_DRIVER_ERROR
353
+ * @return CU_FILE_INVALID_VALUE
354
+ * @return CU_FILE_CUDA_ERROR for unsuported memory type
355
+ * @return CU_FILE_MEMORY_ALREADY_REGISTERED on error
356
+ * @return CU_FILE_GPU_MEMORY_PINNING_FAILED if not enough pinned memory is available
357
+ * @note This memory will be use to perform GPU direct DMA from the supported storage.
358
+ * @warning This API is intended for usecases where the memory is used as streaming buffer that is reused across multiple cuFile IO operations before calling @ref cuFileBufDeregister
359
+ *
360
+ * @see cuFileBufDeregister
361
+ * @see cuFileRead
362
+ * @see cuFileWrite
363
+ */
364
+ CUfileError_t cuFileBufRegister(const void *bufPtr_base, size_t length, int flags);
365
+
366
+ /**
367
+ * @brief deregister an already registered device or host memory from cuFile
368
+ *
369
+ * @param bufPtr_base buffer pointer to deregister
370
+ *
371
+ * @return CU_FILE_SUCCESS on success
372
+ * @return CU_FILE_INVALID_VALUE on invalid memory pointer or unregistered memory pointer
373
+ *
374
+ * @see cuFileBufRegister
375
+ * @see cuFileRead
376
+ * @see cuFileWrite
377
+ */
378
+
379
+ CUfileError_t cuFileBufDeregister(const void *bufPtr_base);
380
+
381
+ /**
382
+ * @brief read data from a registered file handle to a specified device or host memory
383
+ *
384
+ * @param fh @ref CUfileHandle_t opaque file handle
385
+ * @param bufPtr_base base address of buffer in device or host memory
386
+ * @param size size bytes to read
387
+ * @param file_offset file-offset from begining of the file
388
+ * @param bufPtr_offset offset relative to the bufPtr_base pointer to read into.
389
+ *
390
+ * @return size of bytes successfully read
391
+ * @return -1 on error, in which case errno is set to indicate filesystem errors.
392
+ * @return all other errors will return a negative integer value of @ref CUfileOpError enum value.
393
+ *
394
+ * @note If the bufPtr is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers if needed.
395
+ * @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
396
+ * for cases where the BAR1 memory size is smaller than the size of the allocated memory.
397
+ *
398
+ * @see cuFileBufRegister
399
+ * @see cuFileHandleRegister
400
+ * @see cuFileWrite
401
+ */
402
+
403
+ ssize_t cuFileRead(CUfileHandle_t fh, void *bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset);
404
+
405
+ /**
406
+ * @brief write data from a specified device or host memory to a registered file handle
407
+ *
408
+ * @param fh @ref CUfileHandle_t opaque file handle
409
+ * @param bufPtr_base base address of buffer in device or host memory
410
+ * @param size size bytes to write
411
+ * @param file_offset file-offset from begining of the file
412
+ * @param bufPtr_offset offset relative to the bufPtr_base pointer to write from.
413
+ *
414
+ * @return size of bytes successfully written
415
+ * @return -1 on error, in which case errno is set to indicate filesystem errors.
416
+ * @return all other errors will return a negative integer value of @ref CUfileOpError enum value.
417
+ *
418
+ * @note If the bufPtr is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers if needed.
419
+ * @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
420
+ * for cases where the BAR1 memory size is smaller than the size of the allocated memory.
421
+ *
422
+ * @see cuFileBufRegister
423
+ * @see cuFileHandleRegister
424
+ * @see cuFileRead
425
+ */
426
+
427
+ ssize_t cuFileWrite(CUfileHandle_t fh, const void *bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset);
428
+
429
+ // CUFile Driver APIs
430
+
431
+ /**
432
+ * @brief
433
+ * Initialize the cuFile library and open the nvidia-fs driver
434
+ *
435
+ * @return CU_FILE_SUCCESS on success
436
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED
437
+ * @return CU_FILE_DRIVER_VERSION_MISMATCH on driver version mismatch error
438
+ *
439
+ * @see cuFileDriverClose
440
+ */
441
+ CUfileError_t cuFileDriverOpen(void);
442
+
443
+ CUfileError_t cuFileDriverClose(void);
444
+ #define cuFileDriverClose cuFileDriverClose_v2
445
+ /**
446
+ * @brief
447
+ * reset the cuFile library and release the nvidia-fs driver
448
+ *
449
+ * @return CU_FILE_SUCCESS on success
450
+ * @return CU_FILE_DRIVER_CLOSING if there are any active IO operations using @ref cuFileRead or @ref cuFileWrite
451
+ *
452
+ * @see cuFileDriverOpen
453
+ */
454
+ CUfileError_t cuFileDriverClose(void);
455
+
456
+ /**
457
+ * @brief
458
+ * returns use count of cufile drivers at that moment by the process.
459
+ */
460
+ long cuFileUseCount(void);
461
+
462
+ /**
463
+ * @brief
464
+ * Gets the Driver session properties
465
+ *
466
+ * @return CU_FILE_SUCCESS on success
467
+ *
468
+ * @see cuFileDriverSetPollMode
469
+ * @see cuFileDriverSetMaxDirectIOSize
470
+ * @see cuFileDriverSetMaxCacheSize
471
+ * @see cuFileDriverSetMaxPinnedMemSize
472
+ */
473
+ CUfileError_t cuFileDriverGetProperties(CUfileDrvProps_t *props);
474
+
475
+ /**
476
+ * @brief
477
+ * Sets whether the Read/Write APIs use polling to do IO operations
478
+ *
479
+ * @param poll boolean to indicate whether to use poll mode or not
480
+ * @param poll_threshold_size max IO size to use for POLLING mode in KB
481
+ *
482
+ * @return CU_FILE_SUCCESS on success
483
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
484
+ * @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
485
+ *
486
+ * @warning This is an advanced command and should be tuned based on available system memory
487
+ *
488
+ * @see cuFileDriverGetProperties
489
+ */
490
+ CUfileError_t cuFileDriverSetPollMode(bool poll, size_t poll_threshold_size);
491
+
492
+ /**
493
+ * @brief
494
+ * Control parameter to set max IO size(KB) used by the library to talk to nvidia-fs driver
495
+ *
496
+ * @param max_direct_io_size maximum allowed direct io size in KB
497
+ *
498
+ * @return CU_FILE_SUCCESS on success
499
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
500
+ * @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
501
+ *
502
+ * @warning This is an advanced command and should be tuned based on available system memory
503
+ *
504
+ * @see cuFileDriverGetProperties
505
+ *
506
+ */
507
+ CUfileError_t cuFileDriverSetMaxDirectIOSize(size_t max_direct_io_size);
508
+
509
+ /**
510
+ * @brief
511
+ * Control parameter to set maximum GPU memory reserved per device by the library for internal buffering
512
+ *
513
+ * @param max_cache_size The maximum GPU buffer space per device used for internal use in KB
514
+ *
515
+ * @return CU_FILE_SUCCESS on success
516
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
517
+ * @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
518
+ *
519
+ * @warning This is an advanced command and should be tuned based on supported GPU memory
520
+ *
521
+ * @see cuFileDriverGetProperties
522
+ */
523
+ CUfileError_t cuFileDriverSetMaxCacheSize(size_t max_cache_size);
524
+
525
+ /**
526
+ * @brief
527
+ * Sets maximum buffer space that is pinned in KB for use by @ref cuFileBufRegister
528
+ *
529
+ * @param max_pinned_size maximum buffer space that is pinned in KB
530
+ *
531
+ * @return CU_FILE_SUCCESS on success
532
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
533
+ * @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
534
+ *
535
+ * @warning This is an advanced command and should be tuned based on supported GPU memory
536
+ *
537
+ * @see cuFileDriverGetProperties
538
+ *
539
+ */
540
+ CUfileError_t cuFileDriverSetMaxPinnedMemSize(size_t max_pinned_size);
541
+
542
+ //Experimental Batch API's
543
+
544
+
545
+ typedef enum CUfileOpcode {
546
+ CUFILE_READ = 0,
547
+ CUFILE_WRITE
548
+ }CUfileOpcode_t;
549
+
550
+ typedef enum CUFILEStatus_enum {
551
+ CUFILE_WAITING = 0x000001, /* required value prior to submission */
552
+ CUFILE_PENDING = 0x000002, /* once enqueued */
553
+ CUFILE_INVALID = 0x000004, /* request was ill-formed or could not be enqueued */
554
+ CUFILE_CANCELED = 0x000008, /* request successfully canceled */
555
+ CUFILE_COMPLETE = 0x0000010, /* request successfully completed */
556
+ CUFILE_TIMEOUT = 0x0000020, /* request timed out */
557
+ CUFILE_FAILED = 0x0000040 /* unable to complete */
558
+ }CUfileStatus_t;
559
+ typedef enum cufileBatchMode {
560
+ CUFILE_BATCH = 1,
561
+ } CUfileBatchMode_t;
562
+ typedef struct CUfileIOParams {
563
+ CUfileBatchMode_t mode; // Must be the very first field.
564
+ union {
565
+ struct {
566
+ void *devPtr_base; //This can be a device memory or a host memory pointer.
567
+ off_t file_offset;
568
+ off_t devPtr_offset;
569
+ size_t size;
570
+ }batch;
571
+ }u;
572
+ CUfileHandle_t fh;
573
+ CUfileOpcode_t opcode;
574
+ void *cookie;
575
+ }CUfileIOParams_t;
576
+ typedef struct CUfileIOEvents {
577
+ void *cookie;
578
+ CUfileStatus_t status; /* status of the operation */
579
+ size_t ret; /* -ve error or amount of I/O done. */
580
+ }CUfileIOEvents_t;
581
+
582
+ typedef void* CUfileBatchHandle_t;
583
+
584
+ CUfileError_t cuFileBatchIOSetUp(CUfileBatchHandle_t *batch_idp, unsigned nr);
585
+ CUfileError_t cuFileBatchIOSubmit(CUfileBatchHandle_t batch_idp, unsigned nr, CUfileIOParams_t *iocbp, unsigned int flags);
586
+ CUfileError_t cuFileBatchIOGetStatus(CUfileBatchHandle_t batch_idp, unsigned min_nr, unsigned* nr,
587
+ CUfileIOEvents_t *iocbp, struct timespec* timeout);
588
+ CUfileError_t cuFileBatchIOCancel(CUfileBatchHandle_t batch_idp);
589
+ void cuFileBatchIODestroy(CUfileBatchHandle_t batch_idp);
590
+
591
+ //Async API's with cuda streams
592
+
593
+ // cuFile stream API registration flags
594
+ // buffer pointer offset is set at submission time
595
+ #define CU_FILE_STREAM_FIXED_BUF_OFFSET 1
596
+ // file offset is set at submission time
597
+ #define CU_FILE_STREAM_FIXED_FILE_OFFSET 2
598
+ // file size is set at submission time
599
+ #define CU_FILE_STREAM_FIXED_FILE_SIZE 4
600
+ // size, offset and buffer offset are 4k aligned
601
+ #define CU_FILE_STREAM_PAGE_ALIGNED_INPUTS 8
602
+
603
+ /**
604
+ *@brief
605
+
606
+ * @param fh The cuFile handle for the file.
607
+ * @param bufPtr_base base address of buffer in device or host memory
608
+ * @param size_p pointer to size bytes to read
609
+ * @note *size_p if the size is not known at the time of submission, then must provide the max possible size for I/O request.
610
+ * @param file_offset_p pointer to file-offset from begining of the file
611
+ * @param bufPtr_offset_p pointer to offset relative to the bufPtr_base pointer to read into.
612
+ * @param bytes_read_p pointer to the number of bytes that were successfully read.
613
+ * @param CUstream stream cuda stream for the operation.
614
+ *
615
+ * @return size of bytes successfully read in *bytes_read_p
616
+ * @return -1 on error, in which case errno is set to indicate filesystem errors.
617
+ * @return all other errors will return a negative integer value of @ref CUfileOpError enum value.
618
+ *
619
+ * @note If the bufPtr_base is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers.
620
+ * @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
621
+ * for cases where the BAR1 memory size is smaller than the size of the allocated memory.
622
+ * @note If the stream is registered with cuFileStreamRegister, the IO setup and teardown overhead will be reduced.
623
+ * @note on cuda stream errors, the user must call cuFileStreamDeregister to release any outstanding cuFile resources for the stream.
624
+ *
625
+ *
626
+ * @see cuFileBufRegister
627
+ * @see cuFileHandleRegister
628
+ * @see cuFileRead
629
+ * @see cuFileStreamRegister
630
+ * @see cuFileStreamDeregister
631
+ */
632
+
633
+ CUfileError_t cuFileReadAsync(CUfileHandle_t fh, void *bufPtr_base,
634
+ size_t *size_p, off_t *file_offset_p, off_t *bufPtr_offset_p, ssize_t *bytes_read_p, CUstream stream);
635
+
636
+ /**
637
+ *@brief
638
+
639
+ * @param fh The cuFile handle for the file.
640
+ * @param bufPtr_base base address of buffer in device or host memory
641
+ * @param size_p pointer to size bytes to write.
642
+ * @note *size_p if the size is not known at the time of submission, then must provide the max possible size for I/O request.
643
+ * @param file_offset_p pointer to file-offset from begining of the file
644
+ * @param bufPtr_offset_p pointer to offset relative to the bufPtr_base pointer to write from.
645
+ * @param bytes_written_p pointer to the number of bytes that were successfully written.
646
+ * @param CUstream cuda stream for the operation.
647
+ *
648
+ * @return size of bytes successfully written in *bytes_written_p
649
+ * @return -1 on error, in which case errno is set to indicate filesystem errors.
650
+ * @return all other errors will return a negative integer value of @ref CUfileOpError enum value.
651
+ *
652
+ * @note If the bufPtr_base is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers.
653
+ * @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
654
+ * for cases where the BAR1 memory size is smaller than the size of the allocated memory.
655
+ * @note If the stream is registered with cuFileStreamRegister prior to this call, the IO setup and teardown overhead will be reduced.
656
+ * @note on cuda stream errors, the user must call cuFileStreamDeregister to release any outstanding cuFile resources for the stream.
657
+ *
658
+ * @see cuFileBufRegister
659
+ * @see cuFileHandleRegister
660
+ * @see cuFileWrite
661
+ * @see cuFileStreamRegister
662
+ * @see cuFileStreamDeregister
663
+ */
664
+
665
+ CUfileError_t cuFileWriteAsync(CUfileHandle_t fh, void *bufPtr_base,
666
+ size_t *size_p, off_t *file_offset_p, off_t *bufPtr_offset_p, ssize_t *bytes_written_p, CUstream stream);
667
+
668
+ /**
669
+ *@brief
670
+
671
+ * @param CUstream cuda stream for the operation.
672
+ * @param flags for the stream to improve the stream execution of IO based on input parameters.
673
+ * @note supported FLAGS are
674
+ * @note CU_FILE_STREAM_FIXED_BUF_OFFSET - buffer pointer offset is set at submission time
675
+ * @note CU_FILE_STREAM_FIXED_FILE_OFFSET - file offset is set at submission time
676
+ * @note CU_FILE_STREAM_FIXED_FILE_SIZE - file size is set at submission time
677
+ * @note CU_FILE_STREAM_PAGE_ALIGNED_INPUTS - size, offset and buffer offset are 4k aligned
678
+ *
679
+ * @note allocates resources needed to support cuFile operations asynchronously for the cuda stream
680
+ * @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
681
+ * for cases where the BAR1 memory size is smaller than the size of the allocated memory.
682
+ *
683
+ * @return CU_FILE_SUCCESS on success
684
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
685
+ * @return CU_FILE_INVALID_VALUE if the stream is invalid
686
+ *
687
+ * @see cuFileReadAsync
688
+ * @see cuFileWriteAsync
689
+ * @see cuFileStreamDeregister
690
+ */
691
+
692
+ CUfileError_t cuFileStreamRegister(CUstream stream, unsigned flags);
693
+
694
+ /**
695
+ *@brief
696
+
697
+ * @param CUstream cuda stream for the operation.
698
+ *
699
+ * @note deallocates resources used by previous cuFile asynchronous operations for the cuda stream
700
+ * @note highly recommend to call after cuda stream errors to release any outstanding cuFile resources for this stream
701
+ * @note must be called before cuStreamDestroy call for the specified stream.
702
+ * @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
703
+ * for cases where the BAR1 memory size is smaller than the size of the allocated memory.
704
+ *
705
+ * @return CU_FILE_SUCCESS on success
706
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
707
+ * @return CU_FILE_INVALID_VALUE if the stream is invalid
708
+ *
709
+ * @see cuFileReadAsync
710
+ * @see cuFileWriteAsync
711
+ * @see cuFileStreamRegister
712
+ */
713
+
714
+ CUfileError_t cuFileStreamDeregister(CUstream stream);
715
+
716
+ /**
717
+ *@brief
718
+
719
+ * @returns cufile library version.
720
+ *
721
+ * @The version is returned as (1000 major + 10 minor).
722
+ * @For example, CUFILE 1.7.0 would be represented by 1070.
723
+ * @note This is useful for applications that need to inquire the library.
724
+ *
725
+ * @return CU_FILE_SUCCESS on success
726
+ * @return CU_FILE_INVALID_VALUE if the input parameter is null.
727
+ * @return CU_FILE_DRIVER_VERSION_READ_ERROR if the version is not available.
728
+ *
729
+ */
730
+
731
+ CUfileError_t cuFileGetVersion(int *version);
732
+
733
+ #pragma GCC visibility pop
734
+
735
+ /// @cond DOXYGEN_SKIP_MACRO
736
+ #endif // CUFILE_H
737
+ /// @endcond
738
+ #ifdef __cplusplus
739
+ }
740
+ #endif
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cufile/lib/__init__.py ADDED
File without changes
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cufile/lib/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (223 Bytes). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/cufile/lib/libcufile_rdma.so.1 ADDED
Binary file (46.5 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia/curand/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (219 Bytes). View file