BryanW commited on
Commit
2ce59c4
·
verified ·
1 Parent(s): 5e74fae

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/curand_lognormal.h +698 -0
  2. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/curand_mtgp32_host.h +516 -0
  3. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExt.h +1668 -0
  4. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtCounters.h +311 -0
  5. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtCuda.h +164 -0
  6. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtCudaRt.h +139 -0
  7. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtMem.h +749 -0
  8. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtMemCudaRt.h +217 -0
  9. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtOpenCL.h +213 -0
  10. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtPayload.h +1478 -0
  11. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtPayloadHelper.h +192 -0
  12. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSemanticsCounters.h +132 -0
  13. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSemanticsScope.h +50 -0
  14. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSemanticsTime.h +49 -0
  15. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSync.h +406 -0
  16. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtx3.hpp +0 -0
  17. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtHelperMacros.h +64 -0
  18. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImpl.h +123 -0
  19. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplCounters_v1.h +166 -0
  20. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplMemCudaRt_v1.h +72 -0
  21. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplMem_v1.h +168 -0
  22. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplPayload_v1.h +265 -0
  23. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtInit.h +437 -0
  24. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtPayloadHelperInternal.h +294 -0
  25. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtPayloadTypeInfo.h +189 -0
  26. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtTypes.h +66 -0
  27. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImpl.h +464 -0
  28. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplCore.h +432 -0
  29. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplCudaRt_v3.h +128 -0
  30. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplCuda_v3.h +156 -0
  31. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplOpenCL_v3.h +239 -0
  32. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplSync_v3.h +124 -0
  33. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxInit.h +468 -0
  34. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxInitDecls.h +103 -0
  35. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxInitDefs.h +595 -0
  36. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxLinkOnce.h +88 -0
  37. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxTypes.h +318 -0
  38. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/lib/libcufile_rdma.so.1 +0 -0
  39. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/lib/libnvtx3interop.so.1 +0 -0
  40. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/__pycache__/__init__.cpython-312.pyc +0 -0
  41. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/include/__init__.py +0 -0
  42. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/include/__pycache__/__init__.cpython-312.pyc +0 -0
  43. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/include/cufile.h +740 -0
  44. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/lib/__init__.py +0 -0
  45. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/lib/__pycache__/__init__.cpython-312.pyc +0 -0
  46. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/lib/libcufile_rdma.so.1 +0 -0
  47. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/__pycache__/__init__.cpython-312.pyc +0 -0
  48. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/include/__init__.py +0 -0
  49. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/include/__pycache__/__init__.cpython-312.pyc +0 -0
  50. URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/include/curand.h +1080 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/curand_lognormal.h ADDED
@@ -0,0 +1,698 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ /* Copyright 2010-2014 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * The source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * The Licensed Deliverables contained herein are PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and are being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. THEY ARE
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+
51
+ #if !defined(CURAND_LOGNORMAL_H_)
52
+ #define CURAND_LOGNORMAL_H_
53
+
54
+ /**
55
+ * \defgroup DEVICE Device API
56
+ *
57
+ * @{
58
+ */
59
+
60
+ #ifndef __CUDACC_RTC__
61
+ #include <math.h>
62
+ #endif // __CUDACC_RTC__
63
+
64
+ #include "curand_mrg32k3a.h"
65
+ #include "curand_mtgp32_kernel.h"
66
+ #include "curand_philox4x32_x.h"
67
+
68
+ /**
69
+ * \brief Return a log-normally distributed float from an XORWOW generator.
70
+ *
71
+ * Return a single log-normally distributed float derived from a normal
72
+ * distribution with mean \p mean and standard deviation \p stddev
73
+ * from the XORWOW generator in \p state,
74
+ * increment position of generator by one.
75
+ *
76
+ * The implementation uses a Box-Muller transform to generate two
77
+ * normally distributed results, transforms them to log-normal distribution,
78
+ * then returns them one at a time.
79
+ * See ::curand_log_normal2() for a more efficient version that returns
80
+ * both results at once.
81
+ *
82
+ * \param state - Pointer to state to update
83
+ * \param mean - Mean of the related normal distribution
84
+ * \param stddev - Standard deviation of the related normal distribution
85
+ *
86
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
87
+ */
88
+ QUALIFIERS float curand_log_normal(curandStateXORWOW_t *state, float mean, float stddev)
89
+ {
90
+ if(state->boxmuller_flag != EXTRA_FLAG_LOG_NORMAL) {
91
+ unsigned int x, y;
92
+ x = curand(state);
93
+ y = curand(state);
94
+ float2 v = _curand_box_muller(x, y);
95
+ state->boxmuller_extra = expf(mean + (stddev * v.y));
96
+ state->boxmuller_flag = EXTRA_FLAG_LOG_NORMAL;
97
+ return expf(mean + (stddev * v.x));
98
+ }
99
+ state->boxmuller_flag = 0;
100
+ return state->boxmuller_extra;
101
+ }
102
+
103
+ /**
104
+ * \brief Return a log-normally distributed float from an Philox4_32_10 generator.
105
+ *
106
+ * Return a single log-normally distributed float derived from a normal
107
+ * distribution with mean \p mean and standard deviation \p stddev
108
+ * from the Philox4_32_10 generator in \p state,
109
+ * increment position of generator by one.
110
+ *
111
+ * The implementation uses a Box-Muller transform to generate two
112
+ * normally distributed results, transforms them to log-normal distribution,
113
+ * then returns them one at a time.
114
+ * See ::curand_log_normal2() for a more efficient version that returns
115
+ * both results at once.
116
+ *
117
+ * \param state - Pointer to state to update
118
+ * \param mean - Mean of the related normal distribution
119
+ * \param stddev - Standard deviation of the related normal distribution
120
+ *
121
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
122
+ */
123
+
124
+ QUALIFIERS float curand_log_normal(curandStatePhilox4_32_10_t *state, float mean, float stddev)
125
+ {
126
+ if(state->boxmuller_flag != EXTRA_FLAG_LOG_NORMAL) {
127
+ unsigned int x, y;
128
+ x = curand(state);
129
+ y = curand(state);
130
+ float2 v = _curand_box_muller(x, y);
131
+ state->boxmuller_extra = expf(mean + (stddev * v.y));
132
+ state->boxmuller_flag = EXTRA_FLAG_LOG_NORMAL;
133
+ return expf(mean + (stddev * v.x));
134
+ }
135
+ state->boxmuller_flag = 0;
136
+ return state->boxmuller_extra;
137
+ }
138
+
139
+ /**
140
+ * \brief Return two normally distributed floats from an XORWOW generator.
141
+ *
142
+ * Return two log-normally distributed floats derived from a normal
143
+ * distribution with mean \p mean and standard deviation \p stddev
144
+ * from the XORWOW generator in \p state,
145
+ * increment position of generator by two.
146
+ *
147
+ * The implementation uses a Box-Muller transform to generate two
148
+ * normally distributed results, then transforms them to log-normal.
149
+ *
150
+ * \param state - Pointer to state to update
151
+ * \param mean - Mean of the related normal distribution
152
+ * \param stddev - Standard deviation of the related normal distribution
153
+ *
154
+ * \return Log-normally distributed float2 where each element is from a
155
+ * distribution with mean \p mean and standard deviation \p stddev
156
+ */
157
+ QUALIFIERS float2 curand_log_normal2(curandStateXORWOW_t *state, float mean, float stddev)
158
+ {
159
+ float2 v = curand_box_muller(state);
160
+ v.x = expf(mean + (stddev * v.x));
161
+ v.y = expf(mean + (stddev * v.y));
162
+ return v;
163
+ }
164
+
165
+ /**
166
+ * \brief Return two normally distributed floats from an Philox4_32_10 generator.
167
+ *
168
+ * Return two log-normally distributed floats derived from a normal
169
+ * distribution with mean \p mean and standard deviation \p stddev
170
+ * from the Philox4_32_10 generator in \p state,
171
+ * increment position of generator by two.
172
+ *
173
+ * The implementation uses a Box-Muller transform to generate two
174
+ * normally distributed results, then transforms them to log-normal.
175
+ *
176
+ * \param state - Pointer to state to update
177
+ * \param mean - Mean of the related normal distribution
178
+ * \param stddev - Standard deviation of the related normal distribution
179
+ *
180
+ * \return Log-normally distributed float2 where each element is from a
181
+ * distribution with mean \p mean and standard deviation \p stddev
182
+ */
183
+ QUALIFIERS float2 curand_log_normal2(curandStatePhilox4_32_10_t *state, float mean, float stddev)
184
+ {
185
+ float2 v = curand_box_muller(state);
186
+ v.x = expf(mean + (stddev * v.x));
187
+ v.y = expf(mean + (stddev * v.y));
188
+ return v;
189
+ }
190
+ /**
191
+ * \brief Return four normally distributed floats from an Philox4_32_10 generator.
192
+ *
193
+ * Return four log-normally distributed floats derived from a normal
194
+ * distribution with mean \p mean and standard deviation \p stddev
195
+ * from the Philox4_32_10 generator in \p state,
196
+ * increment position of generator by four.
197
+ *
198
+ * The implementation uses a Box-Muller transform to generate two
199
+ * normally distributed results, then transforms them to log-normal.
200
+ *
201
+ * \param state - Pointer to state to update
202
+ * \param mean - Mean of the related normal distribution
203
+ * \param stddev - Standard deviation of the related normal distribution
204
+ *
205
+ * \return Log-normally distributed float4 where each element is from a
206
+ * distribution with mean \p mean and standard deviation \p stddev
207
+ */
208
+ QUALIFIERS float4 curand_log_normal4(curandStatePhilox4_32_10_t *state, float mean, float stddev)
209
+ {
210
+ float4 v = curand_box_muller4(state);
211
+ v.x = expf(mean + (stddev * v.x));
212
+ v.y = expf(mean + (stddev * v.y));
213
+ v.z = expf(mean + (stddev * v.z));
214
+ v.w = expf(mean + (stddev * v.w));
215
+ return v;
216
+ }
217
+
218
+ /**
219
+ * \brief Return a log-normally distributed float from an MRG32k3a generator.
220
+ *
221
+ * Return a single log-normally distributed float derived from a normal
222
+ * distribution with mean \p mean and standard deviation \p stddev
223
+ * from the MRG32k3a generator in \p state,
224
+ * increment position of generator by one.
225
+ *
226
+ * The implementation uses a Box-Muller transform to generate two
227
+ * normally distributed results, transforms them to log-normal distribution,
228
+ * then returns them one at a time.
229
+ * See ::curand_log_normal2() for a more efficient version that returns
230
+ * both results at once.
231
+ *
232
+ * \param state - Pointer to state to update
233
+ * \param mean - Mean of the related normal distribution
234
+ * \param stddev - Standard deviation of the related normal distribution
235
+ *
236
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
237
+ */
238
+ QUALIFIERS float curand_log_normal(curandStateMRG32k3a_t *state, float mean, float stddev)
239
+ {
240
+ if(state->boxmuller_flag != EXTRA_FLAG_LOG_NORMAL) {
241
+ float2 v = curand_box_muller_mrg(state);
242
+ state->boxmuller_extra = expf(mean + (stddev * v.y));
243
+ state->boxmuller_flag = EXTRA_FLAG_LOG_NORMAL;
244
+ return expf(mean + (stddev * v.x));
245
+ }
246
+ state->boxmuller_flag = 0;
247
+ return state->boxmuller_extra;
248
+ }
249
+
250
+ /**
251
+ * \brief Return two normally distributed floats from an MRG32k3a generator.
252
+ *
253
+ * Return two log-normally distributed floats derived from a normal
254
+ * distribution with mean \p mean and standard deviation \p stddev
255
+ * from the MRG32k3a generator in \p state,
256
+ * increment position of generator by two.
257
+ *
258
+ * The implementation uses a Box-Muller transform to generate two
259
+ * normally distributed results, then transforms them to log-normal.
260
+ *
261
+ * \param state - Pointer to state to update
262
+ * \param mean - Mean of the related normal distribution
263
+ * \param stddev - Standard deviation of the related normal distribution
264
+ *
265
+ * \return Log-normally distributed float2 where each element is from a
266
+ * distribution with mean \p mean and standard deviation \p stddev
267
+ */
268
+ QUALIFIERS float2 curand_log_normal2(curandStateMRG32k3a_t *state, float mean, float stddev)
269
+ {
270
+ float2 v = curand_box_muller_mrg(state);
271
+ v.x = expf(mean + (stddev * v.x));
272
+ v.y = expf(mean + (stddev * v.y));
273
+ return v;
274
+ }
275
+
276
+ /**
277
+ * \brief Return a log-normally distributed float from an MTGP32 generator.
278
+ *
279
+ * Return a single log-normally distributed float derived from a normal
280
+ * distribution with mean \p mean and standard deviation \p stddev
281
+ * from the MTGP32 generator in \p state,
282
+ * increment position of generator.
283
+ *
284
+ * The implementation uses the inverse cumulative distribution function
285
+ * to generate a normally distributed result, then transforms the result
286
+ * to log-normal.
287
+ *
288
+ * \param state - Pointer to state to update
289
+ * \param mean - Mean of the related normal distribution
290
+ * \param stddev - Standard deviation of the related normal distribution
291
+ *
292
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
293
+ */
294
+ QUALIFIERS float curand_log_normal(curandStateMtgp32_t *state, float mean, float stddev)
295
+ {
296
+ return expf(mean + (stddev * _curand_normal_icdf(curand(state))));
297
+ }
298
+
299
+ /**
300
+ * \brief Return a log-normally distributed float from a Sobol32 generator.
301
+ *
302
+ * Return a single log-normally distributed float derived from a normal
303
+ * distribution with mean \p mean and standard deviation \p stddev
304
+ * from the Sobol32 generator in \p state,
305
+ * increment position of generator by one.
306
+ *
307
+ * The implementation uses the inverse cumulative distribution function
308
+ * to generate a normally distributed result, then transforms the result
309
+ * to log-normal.
310
+ *
311
+ * \param state - Pointer to state to update
312
+ * \param mean - Mean of the related normal distribution
313
+ * \param stddev - Standard deviation of the related normal distribution
314
+ *
315
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
316
+ */
317
+ QUALIFIERS float curand_log_normal(curandStateSobol32_t *state, float mean, float stddev)
318
+ {
319
+ return expf(mean + (stddev * _curand_normal_icdf(curand(state))));
320
+ }
321
+ /**
322
+ * \brief Return a log-normally distributed float from a scrambled Sobol32 generator.
323
+ *
324
+ * Return a single log-normally distributed float derived from a normal
325
+ * distribution with mean \p mean and standard deviation \p stddev
326
+ * from the scrambled Sobol32 generator in \p state,
327
+ * increment position of generator by one.
328
+ *
329
+ * The implementation uses the inverse cumulative distribution function
330
+ * to generate a normally distributed result, then transforms the result
331
+ * to log-normal.
332
+ *
333
+ * \param state - Pointer to state to update
334
+ * \param mean - Mean of the related normal distribution
335
+ * \param stddev - Standard deviation of the related normal distribution
336
+ *
337
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
338
+ */
339
+ QUALIFIERS float curand_log_normal(curandStateScrambledSobol32_t *state, float mean, float stddev)
340
+ {
341
+ return expf(mean + (stddev * _curand_normal_icdf(curand(state))));
342
+ }
343
+
344
+ /**
345
+ * \brief Return a log-normally distributed float from a Sobol64 generator.
346
+ *
347
+ * Return a single log-normally distributed float derived from a normal
348
+ * distribution with mean \p mean and standard deviation \p stddev
349
+ * from the Sobol64 generator in \p state,
350
+ * increment position of generator by one.
351
+ *
352
+ * The implementation uses the inverse cumulative distribution function
353
+ * to generate normally distributed results, then converts to log-normal
354
+ * distribution.
355
+ *
356
+ * \param state - Pointer to state to update
357
+ * \param mean - Mean of the related normal distribution
358
+ * \param stddev - Standard deviation of the related normal distribution
359
+ *
360
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
361
+ */
362
+ QUALIFIERS float curand_log_normal(curandStateSobol64_t *state, float mean, float stddev)
363
+ {
364
+ return expf(mean + (stddev * _curand_normal_icdf(curand(state))));
365
+ }
366
+
367
+ /**
368
+ * \brief Return a log-normally distributed float from a scrambled Sobol64 generator.
369
+ *
370
+ * Return a single log-normally distributed float derived from a normal
371
+ * distribution with mean \p mean and standard deviation \p stddev
372
+ * from the scrambled Sobol64 generator in \p state,
373
+ * increment position of generator by one.
374
+ *
375
+ * The implementation uses the inverse cumulative distribution function
376
+ * to generate normally distributed results, then converts to log-normal
377
+ * distribution.
378
+ *
379
+ * \param state - Pointer to state to update
380
+ * \param mean - Mean of the related normal distribution
381
+ * \param stddev - Standard deviation of the related normal distribution
382
+ *
383
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
384
+ */
385
+ QUALIFIERS float curand_log_normal(curandStateScrambledSobol64_t *state, float mean, float stddev)
386
+ {
387
+ return expf(mean + (stddev * _curand_normal_icdf(curand(state))));
388
+ }
389
+
390
+ /**
391
+ * \brief Return a log-normally distributed double from an XORWOW generator.
392
+ *
393
+ * Return a single normally distributed double derived from a normal
394
+ * distribution with mean \p mean and standard deviation \p stddev
395
+ * from the XORWOW generator in \p state,
396
+ * increment position of generator.
397
+ *
398
+ * The implementation uses a Box-Muller transform to generate two
399
+ * normally distributed results, transforms them to log-normal distribution,
400
+ * then returns them one at a time.
401
+ * See ::curand_log_normal2_double() for a more efficient version that returns
402
+ * both results at once.
403
+ *
404
+ * \param state - Pointer to state to update
405
+ * \param mean - Mean of the related normal distribution
406
+ * \param stddev - Standard deviation of the related normal distribution
407
+ *
408
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
409
+ */
410
+
411
+ QUALIFIERS double curand_log_normal_double(curandStateXORWOW_t *state, double mean, double stddev)
412
+ {
413
+ if(state->boxmuller_flag_double != EXTRA_FLAG_LOG_NORMAL) {
414
+ unsigned int x0, x1, y0, y1;
415
+ x0 = curand(state);
416
+ x1 = curand(state);
417
+ y0 = curand(state);
418
+ y1 = curand(state);
419
+ double2 v = _curand_box_muller_double(x0, x1, y0, y1);
420
+ state->boxmuller_extra_double = exp(mean + (stddev * v.y));
421
+ state->boxmuller_flag_double = EXTRA_FLAG_LOG_NORMAL;
422
+ return exp(mean + (stddev * v.x));
423
+ }
424
+ state->boxmuller_flag_double = 0;
425
+ return state->boxmuller_extra_double;
426
+ }
427
+
428
+ /**
429
+ * \brief Return a log-normally distributed double from an Philox4_32_10 generator.
430
+ *
431
+ * Return a single normally distributed double derived from a normal
432
+ * distribution with mean \p mean and standard deviation \p stddev
433
+ * from the Philox4_32_10 generator in \p state,
434
+ * increment position of generator.
435
+ *
436
+ * The implementation uses a Box-Muller transform to generate two
437
+ * normally distributed results, transforms them to log-normal distribution,
438
+ * then returns them one at a time.
439
+ * See ::curand_log_normal2_double() for a more efficient version that returns
440
+ * both results at once.
441
+ *
442
+ * \param state - Pointer to state to update
443
+ * \param mean - Mean of the related normal distribution
444
+ * \param stddev - Standard deviation of the related normal distribution
445
+ *
446
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
447
+ */
448
+
449
+ QUALIFIERS double curand_log_normal_double(curandStatePhilox4_32_10_t *state, double mean, double stddev)
450
+ {
451
+ if(state->boxmuller_flag_double != EXTRA_FLAG_LOG_NORMAL) {
452
+ uint4 _x;
453
+ _x = curand4(state);
454
+ double2 v = _curand_box_muller_double(_x.x, _x.y, _x.z, _x.w);
455
+ state->boxmuller_extra_double = exp(mean + (stddev * v.y));
456
+ state->boxmuller_flag_double = EXTRA_FLAG_LOG_NORMAL;
457
+ return exp(mean + (stddev * v.x));
458
+ }
459
+ state->boxmuller_flag_double = 0;
460
+ return state->boxmuller_extra_double;
461
+ }
462
+
463
+
464
+ /**
465
+ * \brief Return two log-normally distributed doubles from an XORWOW generator.
466
+ *
467
+ * Return two log-normally distributed doubles derived from a normal
468
+ * distribution with mean \p mean and standard deviation \p stddev
469
+ * from the XORWOW generator in \p state,
470
+ * increment position of generator by two.
471
+ *
472
+ * The implementation uses a Box-Muller transform to generate two
473
+ * normally distributed results, and transforms them to log-normal distribution,.
474
+ *
475
+ * \param state - Pointer to state to update
476
+ * \param mean - Mean of the related normal distribution
477
+ * \param stddev - Standard deviation of the related normal distribution
478
+ *
479
+ * \return Log-normally distributed double2 where each element is from a
480
+ * distribution with mean \p mean and standard deviation \p stddev
481
+ */
482
+ QUALIFIERS double2 curand_log_normal2_double(curandStateXORWOW_t *state, double mean, double stddev)
483
+ {
484
+ double2 v = curand_box_muller_double(state);
485
+ v.x = exp(mean + (stddev * v.x));
486
+ v.y = exp(mean + (stddev * v.y));
487
+ return v;
488
+ }
489
+
490
+ /**
491
+ * \brief Return two log-normally distributed doubles from an Philox4_32_10 generator.
492
+ *
493
+ * Return two log-normally distributed doubles derived from a normal
494
+ * distribution with mean \p mean and standard deviation \p stddev
495
+ * from the Philox4_32_10 generator in \p state,
496
+ * increment position of generator by four.
497
+ *
498
+ * The implementation uses a Box-Muller transform to generate two
499
+ * normally distributed results, and transforms them to log-normal distribution,.
500
+ *
501
+ * \param state - Pointer to state to update
502
+ * \param mean - Mean of the related normal distribution
503
+ * \param stddev - Standard deviation of the related normal distribution
504
+ *
505
+ * \return Log-normally distributed double4 where each element is from a
506
+ * distribution with mean \p mean and standard deviation \p stddev
507
+ */
508
+ QUALIFIERS double2 curand_log_normal2_double(curandStatePhilox4_32_10_t *state, double mean, double stddev)
509
+ {
510
+ double2 v = curand_box_muller2_double(state);
511
+ v.x = exp(mean + (stddev * v.x));
512
+ v.y = exp(mean + (stddev * v.y));
513
+ return v;
514
+ }
515
+ // nor part of API
516
+ __NV_SILENCE_DEPRECATION_BEGIN
517
+ QUALIFIERS double4 curand_log_normal4_double(curandStatePhilox4_32_10_t *state, double mean, double stddev)
518
+ {
519
+ double4 v = curand_box_muller4_double(state);
520
+ v.x = exp(mean + (stddev * v.x));
521
+ v.y = exp(mean + (stddev * v.y));
522
+ v.z = exp(mean + (stddev * v.z));
523
+ v.w = exp(mean + (stddev * v.w));
524
+ return v;
525
+ }
526
+ __NV_SILENCE_DEPRECATION_END
527
+ /**
528
+ * \brief Return a log-normally distributed double from an MRG32k3a generator.
529
+ *
530
+ * Return a single normally distributed double derived from a normal
531
+ * distribution with mean \p mean and standard deviation \p stddev
532
+ * from the MRG32k3a generator in \p state,
533
+ * increment position of generator.
534
+ *
535
+ * The implementation uses a Box-Muller transform to generate two
536
+ * normally distributed results, transforms them to log-normal distribution,
537
+ * then returns them one at a time.
538
+ * See ::curand_log_normal2_double() for a more efficient version that returns
539
+ * both results at once.
540
+ *
541
+ * \param state - Pointer to state to update
542
+ * \param mean - Mean of the related normal distribution
543
+ * \param stddev - Standard deviation of the related normal distribution
544
+ *
545
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
546
+ */
547
+ QUALIFIERS double curand_log_normal_double(curandStateMRG32k3a_t *state, double mean, double stddev)
548
+ {
549
+ if(state->boxmuller_flag_double != EXTRA_FLAG_LOG_NORMAL) {
550
+ double2 v = curand_box_muller_mrg_double(state);
551
+ state->boxmuller_extra_double = exp(mean + (stddev * v.y));
552
+ state->boxmuller_flag_double = EXTRA_FLAG_LOG_NORMAL;
553
+ return exp(mean + (stddev * v.x));
554
+ }
555
+ state->boxmuller_flag_double = 0;
556
+ return state->boxmuller_extra_double;
557
+ }
558
+
559
+ /**
560
+ * \brief Return two log-normally distributed doubles from an MRG32k3a generator.
561
+ *
562
+ * Return two log-normally distributed doubles derived from a normal
563
+ * distribution with mean \p mean and standard deviation \p stddev
564
+ * from the MRG32k3a generator in \p state,
565
+ * increment position of generator by two.
566
+ *
567
+ * The implementation uses a Box-Muller transform to generate two
568
+ * normally distributed results, and transforms them to log-normal distribution,.
569
+ *
570
+ * \param state - Pointer to state to update
571
+ * \param mean - Mean of the related normal distribution
572
+ * \param stddev - Standard deviation of the related normal distribution
573
+ *
574
+ * \return Log-normally distributed double2 where each element is from a
575
+ * distribution with mean \p mean and standard deviation \p stddev
576
+ */
577
+ QUALIFIERS double2 curand_log_normal2_double(curandStateMRG32k3a_t *state, double mean, double stddev)
578
+ {
579
+ double2 v = curand_box_muller_mrg_double(state);
580
+ v.x = exp(mean + (stddev * v.x));
581
+ v.y = exp(mean + (stddev * v.y));
582
+ return v;
583
+ }
584
+
585
+ /**
586
+ * \brief Return a log-normally distributed double from an MTGP32 generator.
587
+ *
588
+ * Return a single log-normally distributed double derived from a normal
589
+ * distribution with mean \p mean and standard deviation \p stddev
590
+ * from the MTGP32 generator in \p state,
591
+ * increment position of generator.
592
+ *
593
+ * The implementation uses the inverse cumulative distribution function
594
+ * to generate normally distributed results, and transforms them into
595
+ * log-normal distribution.
596
+ *
597
+ * \param state - Pointer to state to update
598
+ * \param mean - Mean of the related normal distribution
599
+ * \param stddev - Standard deviation of the related normal distribution
600
+ *
601
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
602
+ */
603
+ QUALIFIERS double curand_log_normal_double(curandStateMtgp32_t *state, double mean, double stddev)
604
+ {
605
+ return exp(mean + (stddev * _curand_normal_icdf_double(curand(state))));
606
+ }
607
+
608
+ /**
609
+ * \brief Return a log-normally distributed double from a Sobol32 generator.
610
+ *
611
+ * Return a single log-normally distributed double derived from a normal
612
+ * distribution with mean \p mean and standard deviation \p stddev
613
+ * from the Sobol32 generator in \p state,
614
+ * increment position of generator by one.
615
+ *
616
+ * The implementation uses the inverse cumulative distribution function
617
+ * to generate normally distributed results, and transforms them into
618
+ * log-normal distribution.
619
+ *
620
+ * \param state - Pointer to state to update
621
+ * \param mean - Mean of the related normal distribution
622
+ * \param stddev - Standard deviation of the related normal distribution
623
+ *
624
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
625
+ */
626
+ QUALIFIERS double curand_log_normal_double(curandStateSobol32_t *state, double mean, double stddev)
627
+ {
628
+ return exp(mean + (stddev * _curand_normal_icdf_double(curand(state))));
629
+ }
630
+
631
+ /**
632
+ * \brief Return a log-normally distributed double from a scrambled Sobol32 generator.
633
+ *
634
+ * Return a single log-normally distributed double derived from a normal
635
+ * distribution with mean \p mean and standard deviation \p stddev
636
+ * from the scrambled Sobol32 generator in \p state,
637
+ * increment position of generator by one.
638
+ *
639
+ * The implementation uses the inverse cumulative distribution function
640
+ * to generate normally distributed results, and transforms them into
641
+ * log-normal distribution.
642
+ *
643
+ * \param state - Pointer to state to update
644
+ * \param mean - Mean of the related normal distribution
645
+ * \param stddev - Standard deviation of the related normal distribution
646
+ *
647
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
648
+ */
649
+ QUALIFIERS double curand_log_normal_double(curandStateScrambledSobol32_t *state, double mean, double stddev)
650
+ {
651
+ return exp(mean + (stddev * _curand_normal_icdf_double(curand(state))));
652
+ }
653
+
654
+ /**
655
+ * \brief Return a log-normally distributed double from a Sobol64 generator.
656
+ *
657
+ * Return a single normally distributed double derived from a normal
658
+ * distribution with mean \p mean and standard deviation \p stddev
659
+ * from the Sobol64 generator in \p state,
660
+ * increment position of generator by one.
661
+ *
662
+ * The implementation uses the inverse cumulative distribution function
663
+ * to generate normally distributed results.
664
+ *
665
+ * \param state - Pointer to state to update
666
+ * \param mean - Mean of the related normal distribution
667
+ * \param stddev - Standard deviation of the related normal distribution
668
+ *
669
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
670
+ */
671
+ QUALIFIERS double curand_log_normal_double(curandStateSobol64_t *state, double mean, double stddev)
672
+ {
673
+ return exp(mean + (stddev * _curand_normal_icdf_double(curand(state))));
674
+ }
675
+
676
+ /**
677
+ * \brief Return a log-normally distributed double from a scrambled Sobol64 generator.
678
+ *
679
+ * Return a single normally distributed double derived from a normal
680
+ * distribution with mean \p mean and standard deviation \p stddev
681
+ * from the scrambled Sobol64 generator in \p state,
682
+ * increment position of generator by one.
683
+ *
684
+ * The implementation uses the inverse cumulative distribution function
685
+ * to generate normally distributed results.
686
+ *
687
+ * \param state - Pointer to state to update
688
+ * \param mean - Mean of the related normal distribution
689
+ * \param stddev - Standard deviation of the related normal distribution
690
+ *
691
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
692
+ */
693
+ QUALIFIERS double curand_log_normal_double(curandStateScrambledSobol64_t *state, double mean, double stddev)
694
+ {
695
+ return exp(mean + (stddev * _curand_normal_icdf_double(curand(state))));
696
+ }
697
+
698
+ #endif // !defined(CURAND_LOGNORMAL_H_)
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/curand_mtgp32_host.h ADDED
@@ -0,0 +1,516 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2010-2014 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ /*
51
+ * curand_mtgp32_host.h
52
+ *
53
+ *
54
+ * MTGP32-11213
55
+ *
56
+ * Mersenne Twister RNG for the GPU
57
+ *
58
+ * The period of generated integers is 2<sup>11213</sup>-1.
59
+ *
60
+ * This code generates 32-bit unsigned integers, and
61
+ * single precision floating point numbers uniformly distributed
62
+ * in the range [1, 2). (float r; 1.0 <= r < 2.0)
63
+ */
64
+
65
+ /*
66
+ * Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima
67
+ * University. All rights reserved.
68
+ * Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima
69
+ * University and University of Tokyo. All rights reserved.
70
+ *
71
+ * Redistribution and use in source and binary forms, with or without
72
+ * modification, are permitted provided that the following conditions are
73
+ * met:
74
+ *
75
+ * * Redistributions of source code must retain the above copyright
76
+ * notice, this list of conditions and the following disclaimer.
77
+ * * Redistributions in binary form must reproduce the above
78
+ * copyright notice, this list of conditions and the following
79
+ * disclaimer in the documentation and/or other materials provided
80
+ * with the distribution.
81
+ * * Neither the name of the Hiroshima University nor the names of
82
+ * its contributors may be used to endorse or promote products
83
+ * derived from this software without specific prior written
84
+ * permission.
85
+ *
86
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
87
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
88
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
89
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
90
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
91
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
92
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
93
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
94
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
95
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
96
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
97
+ */
98
+ #if !defined CURAND_MTGP32_HOST_H
99
+ #define CURAND_MTGP32_HOST_H
100
+
101
+ #if !defined(QUALIFIERS)
102
+ #define QUALIFIERS static inline __device__
103
+ #endif
104
+
105
+ #include <cuda_runtime.h>
106
+ #include <stdlib.h>
107
+ #include <memory.h>
108
+ #include <string.h>
109
+ #include "curand.h"
110
+ #include "curand_mtgp32.h"
111
+ #include "curand_mtgp32dc_p_11213.h"
112
+
113
+
114
+ /**
115
+ * \addtogroup DEVICE Device API
116
+ *
117
+ * @{
118
+ */
119
+
120
+ static const unsigned int non_zero = 0x4d544750;
121
+
122
+ /*
123
+ * This function represents a function used in the initialization
124
+ * by mtgp32_init_by_array() and mtgp32_init_by_str().
125
+ * @param[in] x 32-bit integer
126
+ * @return 32-bit integer
127
+ */
128
+ static __forceinline__ unsigned int ini_func1(unsigned int x) {
129
+ return (x ^ (x >> 27)) * (1664525);
130
+ }
131
+
132
+ /*
133
+ * This function represents a function used in the initialization
134
+ * by mtgp32_init_by_array() and mtgp32_init_by_str().
135
+ * @param[in] x 32-bit integer
136
+ * @return 32-bit integer
137
+ */
138
+ static __forceinline__ unsigned int ini_func2(unsigned int x) {
139
+ return (x ^ (x >> 27)) * (1566083941);
140
+ }
141
+
142
+ /*
143
+ * This function initializes the internal state array with a 32-bit
144
+ * integer seed. The allocated memory should be freed by calling
145
+ * mtgp32_free(). \b para should be one of the elements in the
146
+ * parameter table (mtgp32-param-ref.c).
147
+ *
148
+ * This function is call by cuda program, because cuda program uses
149
+ * another structure and another allocation method.
150
+ *
151
+ * @param[out] array MTGP internal status vector.
152
+ * @param[in] para parameter structure
153
+ * @param[in] seed a 32-bit integer used as the seed.
154
+ */
155
+ static __forceinline__ __host__
156
+ void mtgp32_init_state(unsigned int state[],
157
+ const mtgp32_params_fast_t *para, unsigned int seed) {
158
+ int i;
159
+ int size = para->mexp / 32 + 1;
160
+ unsigned int hidden_seed;
161
+ unsigned int tmp;
162
+ hidden_seed = para->tbl[4] ^ (para->tbl[8] << 16);
163
+ tmp = hidden_seed;
164
+ tmp += tmp >> 16;
165
+ tmp += tmp >> 8;
166
+ memset(state, tmp & 0xff, sizeof(unsigned int) * size);
167
+ state[0] = seed;
168
+ state[1] = hidden_seed;
169
+ for (i = 1; i < size; i++) {
170
+ state[i] ^= (1812433253) * (state[i - 1] ^ (state[i - 1] >> 30)) + i;
171
+ }
172
+ }
173
+
174
+ /*
175
+ * This function initializes the internal state array
176
+ * with a 32-bit integer array. \b para should be one of the elements in
177
+ * the parameter table (mtgp32-param-ref.c).
178
+ *
179
+ * @param[out] mtgp32 MTGP structure.
180
+ * @param[in] para parameter structure
181
+ * @param[in] array a 32-bit integer array used as a seed.
182
+ * @param[in] length length of the array.
183
+ * @return CURAND_STATUS_SUCCESS
184
+ */
185
+ static __forceinline__ __host__
186
+ int mtgp32_init_by_array(unsigned int state[],
187
+ const mtgp32_params_fast_t *para,
188
+ unsigned int *array, int length) {
189
+ int i, j, count;
190
+ unsigned int r;
191
+ int lag;
192
+ int mid;
193
+ int size = para->mexp / 32 + 1;
194
+ unsigned int hidden_seed;
195
+ unsigned int tmp;
196
+
197
+ if (size >= 623) {
198
+ lag = 11;
199
+ } else if (size >= 68) {
200
+ lag = 7;
201
+ } else if (size >= 39) {
202
+ lag = 5;
203
+ } else {
204
+ lag = 3;
205
+ }
206
+ mid = (size - lag) / 2;
207
+
208
+ hidden_seed = para->tbl[4] ^ (para->tbl[8] << 16);
209
+ tmp = hidden_seed;
210
+ tmp += tmp >> 16;
211
+ tmp += tmp >> 8;
212
+ memset(state, tmp & 0xff, sizeof(unsigned int) * size);
213
+ state[0] = hidden_seed;
214
+
215
+ if (length + 1 > size) {
216
+ count = length + 1;
217
+ } else {
218
+ count = size;
219
+ }
220
+ r = ini_func1(state[0] ^ state[mid] ^ state[size - 1]);
221
+ state[mid] += r;
222
+ r += length;
223
+ state[(mid + lag) % size] += r;
224
+ state[0] = r;
225
+ i = 1;
226
+ count--;
227
+ for (i = 1, j = 0; (j < count) && (j < length); j++) {
228
+ r = ini_func1(state[i] ^ state[(i + mid) % size]
229
+ ^ state[(i + size - 1) % size]);
230
+ state[(i + mid) % size] += r;
231
+ r += array[j] + i;
232
+ state[(i + mid + lag) % size] += r;
233
+ state[i] = r;
234
+ i = (i + 1) % size;
235
+ }
236
+ for (; j < count; j++) {
237
+ r = ini_func1(state[i] ^ state[(i + mid) % size]
238
+ ^ state[(i + size - 1) % size]);
239
+ state[(i + mid) % size] += r;
240
+ r += i;
241
+ state[(i + mid + lag) % size] += r;
242
+ state[i] = r;
243
+ i = (i + 1) % size;
244
+ }
245
+ for (j = 0; j < size; j++) {
246
+ r = ini_func2(state[i] + state[(i + mid) % size]
247
+ + state[(i + size - 1) % size]);
248
+ state[(i + mid) % size] ^= r;
249
+ r -= i;
250
+ state[(i + mid + lag) % size] ^= r;
251
+ state[i] = r;
252
+ i = (i + 1) % size;
253
+ }
254
+ if (state[size - 1] == 0) {
255
+ state[size - 1] = non_zero;
256
+ }
257
+ return 0;
258
+ }
259
+
260
+ /*
261
+ * This function initializes the internal state array
262
+ * with a character array. \b para should be one of the elements in
263
+ * the parameter table (mtgp32-param-ref.c).
264
+ * This is the same algorithm with mtgp32_init_by_array(), but hope to
265
+ * be more useful.
266
+ *
267
+ * @param[out] mtgp32 MTGP structure.
268
+ * @param[in] para parameter structure
269
+ * @param[in] array a character array used as a seed. (terminated by zero.)
270
+ * @return memory allocation result. if 0 then O.K.
271
+ */
272
+ static __forceinline__ __host__
273
+ int mtgp32_init_by_str(unsigned int state[],
274
+ const mtgp32_params_fast_t *para, unsigned char *array) {
275
+ int i, j, count;
276
+ unsigned int r;
277
+ int lag;
278
+ int mid;
279
+ int size = para->mexp / 32 + 1;
280
+ int length = (unsigned int)strlen((char *)array);
281
+ unsigned int hidden_seed;
282
+ unsigned int tmp;
283
+
284
+ if (size >= 623) {
285
+ lag = 11;
286
+ } else if (size >= 68) {
287
+ lag = 7;
288
+ } else if (size >= 39) {
289
+ lag = 5;
290
+ } else {
291
+ lag = 3;
292
+ }
293
+ mid = (size - lag) / 2;
294
+
295
+ hidden_seed = para->tbl[4] ^ (para->tbl[8] << 16);
296
+ tmp = hidden_seed;
297
+ tmp += tmp >> 16;
298
+ tmp += tmp >> 8;
299
+ memset(state, tmp & 0xff, sizeof(unsigned int) * size);
300
+ state[0] = hidden_seed;
301
+
302
+ if (length + 1 > size) {
303
+ count = length + 1;
304
+ } else {
305
+ count = size;
306
+ }
307
+ r = ini_func1(state[0] ^ state[mid] ^ state[size - 1]);
308
+ state[mid] += r;
309
+ r += length;
310
+ state[(mid + lag) % size] += r;
311
+ state[0] = r;
312
+ i = 1;
313
+ count--;
314
+ for (i = 1, j = 0; (j < count) && (j < length); j++) {
315
+ r = ini_func1(state[i] ^ state[(i + mid) % size]
316
+ ^ state[(i + size - 1) % size]);
317
+ state[(i + mid) % size] += r;
318
+ r += array[j] + i;
319
+ state[(i + mid + lag) % size] += r;
320
+ state[i] = r;
321
+ i = (i + 1) % size;
322
+ }
323
+ for (; j < count; j++) {
324
+ r = ini_func1(state[i] ^ state[(i + mid) % size]
325
+ ^ state[(i + size - 1) % size]);
326
+ state[(i + mid) % size] += r;
327
+ r += i;
328
+ state[(i + mid + lag) % size] += r;
329
+ state[i] = r;
330
+ i = (i + 1) % size;
331
+ }
332
+ for (j = 0; j < size; j++) {
333
+ r = ini_func2(state[i] + state[(i + mid) % size]
334
+ + state[(i + size - 1) % size]);
335
+ state[(i + mid) % size] ^= r;
336
+ r -= i;
337
+ state[(i + mid + lag) % size] ^= r;
338
+ state[i] = r;
339
+ i = (i + 1) % size;
340
+ }
341
+ if (state[size - 1] == 0) {
342
+ state[size - 1] = non_zero;
343
+ }
344
+ return 0;
345
+ }
346
+
347
+ template<typename ParamsType>
348
+ static __forceinline__ __host__
349
+ curandStatus_t curandMakeMTGP32ConstantsImpl(const mtgp32_params_fast_t params[], ParamsType * p, const int block_num)
350
+ {
351
+ const int size1 = sizeof(unsigned int) * block_num;
352
+ const int size2 = sizeof(unsigned int) * block_num * TBL_SIZE;
353
+ unsigned int *h_pos_tbl;
354
+ unsigned int *h_sh1_tbl;
355
+ unsigned int *h_sh2_tbl;
356
+ unsigned int *h_param_tbl;
357
+ unsigned int *h_temper_tbl;
358
+ unsigned int *h_single_temper_tbl;
359
+ unsigned int *h_mask;
360
+ curandStatus_t status = CURAND_STATUS_SUCCESS;
361
+
362
+ h_pos_tbl = (unsigned int *)malloc(size1);
363
+ h_sh1_tbl = (unsigned int *)malloc(size1);
364
+ h_sh2_tbl = (unsigned int *)malloc(size1);
365
+ h_param_tbl = (unsigned int *)malloc(size2);
366
+ h_temper_tbl = (unsigned int *)malloc(size2);
367
+ h_single_temper_tbl = (unsigned int *)malloc(size2);
368
+ h_mask = (unsigned int *)malloc(sizeof(unsigned int));
369
+ if (h_pos_tbl == NULL
370
+ || h_sh1_tbl == NULL
371
+ || h_sh2_tbl == NULL
372
+ || h_param_tbl == NULL
373
+ || h_temper_tbl == NULL
374
+ || h_single_temper_tbl == NULL
375
+ || h_mask == NULL) {
376
+ if (h_pos_tbl != NULL) free(h_pos_tbl);
377
+ if (h_sh1_tbl != NULL) free(h_sh1_tbl);
378
+ if (h_sh2_tbl != NULL) free(h_sh2_tbl);
379
+ if (h_param_tbl != NULL) free(h_param_tbl);
380
+ if (h_temper_tbl != NULL) free(h_temper_tbl);
381
+ if (h_single_temper_tbl != NULL) free(h_single_temper_tbl);
382
+ if (h_mask != NULL) free(h_mask);
383
+ status = CURAND_STATUS_ALLOCATION_FAILED;
384
+ } else {
385
+
386
+ h_mask[0] = params[0].mask;
387
+ for (int i = 0; i < block_num; i++) {
388
+ h_pos_tbl[i] = params[i].pos;
389
+ h_sh1_tbl[i] = params[i].sh1;
390
+ h_sh2_tbl[i] = params[i].sh2;
391
+ for (int j = 0; j < TBL_SIZE; j++) {
392
+ h_param_tbl[i * TBL_SIZE + j] = params[i].tbl[j];
393
+ h_temper_tbl[i * TBL_SIZE + j] = params[i].tmp_tbl[j];
394
+ h_single_temper_tbl[i * TBL_SIZE + j] = params[i].flt_tmp_tbl[j];
395
+ }
396
+ }
397
+ if (cudaMemcpy( p->pos_tbl,
398
+ h_pos_tbl, size1, cudaMemcpyHostToDevice) != cudaSuccess)
399
+ {
400
+ status = CURAND_STATUS_INITIALIZATION_FAILED;
401
+ } else
402
+ if (cudaMemcpy( p->sh1_tbl,
403
+ h_sh1_tbl, size1, cudaMemcpyHostToDevice) != cudaSuccess)
404
+ {
405
+ status = CURAND_STATUS_INITIALIZATION_FAILED;
406
+ } else
407
+ if (cudaMemcpy( p->sh2_tbl,
408
+ h_sh2_tbl, size1, cudaMemcpyHostToDevice) != cudaSuccess)
409
+ {
410
+ status = CURAND_STATUS_INITIALIZATION_FAILED;
411
+ } else
412
+ if (cudaMemcpy( p->param_tbl,
413
+ h_param_tbl, size2, cudaMemcpyHostToDevice) != cudaSuccess)
414
+ {
415
+ status = CURAND_STATUS_INITIALIZATION_FAILED;
416
+ } else
417
+ if (cudaMemcpy( p->temper_tbl,
418
+ h_temper_tbl, size2, cudaMemcpyHostToDevice) != cudaSuccess)
419
+ {
420
+ status = CURAND_STATUS_INITIALIZATION_FAILED;
421
+ } else
422
+ if (cudaMemcpy( p->single_temper_tbl,
423
+ h_single_temper_tbl, size2, cudaMemcpyHostToDevice) != cudaSuccess)
424
+ {
425
+ status = CURAND_STATUS_INITIALIZATION_FAILED;
426
+ } else
427
+ if (cudaMemcpy( p->mask,
428
+ h_mask, sizeof(unsigned int), cudaMemcpyHostToDevice) != cudaSuccess)
429
+ {
430
+ status = CURAND_STATUS_INITIALIZATION_FAILED;
431
+ }
432
+ }
433
+ if (h_pos_tbl != NULL) free(h_pos_tbl);
434
+ if (h_sh1_tbl != NULL) free(h_sh1_tbl);
435
+ if (h_sh2_tbl != NULL) free(h_sh2_tbl);
436
+ if (h_param_tbl != NULL) free(h_param_tbl);
437
+ if (h_temper_tbl != NULL) free(h_temper_tbl);
438
+ if (h_single_temper_tbl != NULL)free(h_single_temper_tbl);
439
+ if (h_mask != NULL) free(h_mask);
440
+ return status;
441
+ }
442
+
443
+ /**
444
+ * \brief Set up constant parameters for the mtgp32 generator
445
+ *
446
+ * This host-side helper function re-organizes CURAND_NUM_MTGP32_PARAMS sets of
447
+ * generator parameters for use by kernel functions and copies the
448
+ * result to the specified location in device memory.
449
+ *
450
+ * \param params - Pointer to an array of type mtgp32_params_fast_t in host memory
451
+ * \param p - pointer to a structure of type mtgp32_kernel_params_t in device memory.
452
+ *
453
+ * \return
454
+ * - CURAND_STATUS_ALLOCATION_FAILED if host memory could not be allocated
455
+ * - CURAND_STATUS_INITIALIZATION_FAILED if the copy to device memory failed
456
+ * - CURAND_STATUS_SUCCESS otherwise
457
+ */
458
+ static __forceinline__ __host__
459
+ curandStatus_t curandMakeMTGP32Constants(const mtgp32_params_fast_t params[], mtgp32_kernel_params_t * p)
460
+ {
461
+ return curandMakeMTGP32ConstantsImpl(params, p, CURAND_NUM_MTGP32_PARAMS);
462
+ }
463
+
464
+ /**
465
+ * \brief Set up initial states for the mtgp32 generator
466
+ *
467
+ * This host-side helper function initializes a number of states (one parameter set per state) for
468
+ * an mtgp32 generator. To accomplish this it allocates a state array in host memory,
469
+ * initializes that array, and copies the result to device memory.
470
+ *
471
+ * \param s - pointer to an array of states in device memory
472
+ * \param params - Pointer to an array of type mtgp32_params_fast_t in host memory
473
+ * \param k - pointer to a structure of type mtgp32_kernel_params_t in device memory
474
+ * \param n - number of parameter sets/states to initialize
475
+ * \param seed - seed value
476
+ *
477
+ * \return
478
+ * - CURAND_STATUS_ALLOCATION_FAILED if host memory state could not be allocated
479
+ * - CURAND_STATUS_INITIALIZATION_FAILED if the copy to device memory failed
480
+ * - CURAND_STATUS_SUCCESS otherwise
481
+ */
482
+ static __forceinline__ __host__
483
+ curandStatus_t CURANDAPI curandMakeMTGP32KernelState(curandStateMtgp32_t *s,
484
+ mtgp32_params_fast_t params[],
485
+ mtgp32_kernel_params_t *k,
486
+ int n,
487
+ unsigned long long seed)
488
+ {
489
+ int i;
490
+ curandStatus_t status = CURAND_STATUS_SUCCESS;
491
+ curandStateMtgp32_t *h_status =(curandStateMtgp32_t *) malloc(sizeof(curandStateMtgp32_t) * n);
492
+ if (h_status == NULL) {
493
+ status = CURAND_STATUS_ALLOCATION_FAILED;
494
+ } else {
495
+ seed = seed ^ (seed >> 32);
496
+ for (i = 0; i < n; i++) {
497
+ mtgp32_init_state(&(h_status[i].s[0]), &params[i],(unsigned int)seed + i + 1);
498
+ h_status[i].offset = 0;
499
+ h_status[i].pIdx = i;
500
+ h_status[i].k = k;
501
+ }
502
+ if (cudaMemcpy(s, h_status,
503
+ sizeof(curandStateMtgp32_t) * n,
504
+ cudaMemcpyHostToDevice) != cudaSuccess) {
505
+ status = CURAND_STATUS_INITIALIZATION_FAILED;
506
+ }
507
+ }
508
+ free(h_status);
509
+ return status;
510
+ }
511
+
512
+ /** @} */
513
+
514
+ #endif
515
+
516
+
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExt.h ADDED
@@ -0,0 +1,1668 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ /** \file nvToolsExt.h
22
+ */
23
+
24
+ /* ========================================================================= */
25
+ /** \mainpage
26
+ * \tableofcontents
27
+ * \section INTRODUCTION Introduction
28
+ *
29
+ * The NVIDIA Tools Extension library is a set of functions that a
30
+ * developer can use to provide additional information to tools.
31
+ * The additional information is used by the tool to improve
32
+ * analysis and visualization of data.
33
+ *
34
+ * The library introduces close to zero overhead if no tool is
35
+ * attached to the application. The overhead when a tool is
36
+ * attached is specific to the tool.
37
+ *
38
+ * \section INITIALIZATION Initialization
39
+ *
40
+ * Typically the tool's library that plugs into NVTX is indirectly
41
+ * loaded via environmental properties that are platform specific.
42
+ * For some platform or special cases, the user may be required
43
+ * to instead explicitly initialize instead though. This can also
44
+ * be helpful to control when the API loads a tool's library instead
45
+ * of what would typically be the first function call to emit info.
46
+ * For these rare case, see \ref INITIALIZATION for additional information.
47
+ *
48
+ * \section MARKERS_AND_RANGES Markers and Ranges
49
+ *
50
+ * Markers and ranges are used to describe events at a specific time (markers)
51
+ * or over a time span (ranges) during the execution of the application
52
+ * respectively.
53
+ *
54
+ * \subsection MARKERS Markers
55
+ *
56
+ * Markers denote specific moments in time.
57
+ *
58
+ *
59
+ * See \ref DOMAINS and \ref EVENT_ATTRIBUTES for additional information on
60
+ * how to specify the domain.
61
+ *
62
+ * \subsection THREAD_RANGES Thread Ranges
63
+ *
64
+ * Thread ranges denote nested time ranges. Nesting is maintained per thread
65
+ * per domain and does not require any additional correlation mechanism. The
66
+ * duration of a thread range is defined by the corresponding pair of
67
+ * nvtxRangePush* to nvtxRangePop API calls.
68
+ *
69
+ * See \ref DOMAINS and \ref EVENT_ATTRIBUTES for additional information on
70
+ * how to specify the domain.
71
+ *
72
+ * \subsection PROCESS_RANGES Process Ranges
73
+ *
74
+ * Process ranges denote a time span that can expose arbitrary concurrency, as
75
+ * opposed to thread ranges that only support nesting. In addition the range
76
+ * start event can happen on a different thread than the end marker. For the
77
+ * correlation of a start/end pair an unique correlation ID is used that is
78
+ * returned from the start API call and needs to be passed into the end API
79
+ * call.
80
+ *
81
+ * \subsection EVENT_ATTRIBUTES Event Attributes
82
+ *
83
+ * \ref MARKERS_AND_RANGES can be annotated with various attributes to provide
84
+ * additional information for an event or to guide the tool's visualization of
85
+ * the data. Each of the attributes is optional and if left unused the
86
+ * attributes fall back to a default value. The attributes include:
87
+ * - color
88
+ * - category
89
+ *
90
+ * To specify any attribute other than the text message, the \ref
91
+ * EVENT_ATTRIBUTE_STRUCTURE "Event Attribute Structure" must be used.
92
+ *
93
+ * \section DOMAINS Domains
94
+ *
95
+ * Domains enable developers to scope annotations. By default all events and
96
+ * annotations are in the default domain. Additional domains can be registered.
97
+ * This allows developers to scope markers, ranges, and resources names to
98
+ * avoid conflicts.
99
+ *
100
+ * The function ::nvtxDomainCreateA or ::nvtxDomainCreateW is used to create
101
+ * a named domain.
102
+ *
103
+ * Each domain maintains its own
104
+ * - categories
105
+ * - thread range stacks
106
+ * - registered strings
107
+ *
108
+ * The function ::nvtxDomainDestroy marks the end of the domain. Destroying
109
+ * a domain unregisters and destroys all objects associated with it such as
110
+ * registered strings, resource objects, named categories, and started ranges.
111
+ *
112
+ * \section RESOURCE_NAMING Resource Naming
113
+ *
114
+ * This section covers calls that allow to annotate objects with user-provided
115
+ * names in order to allow for a better analysis of complex trace data. All of
116
+ * the functions take the handle or the ID of the object to name and the name.
117
+ * The functions can be called multiple times during the execution of an
118
+ * application, however, in that case it is implementation dependent which
119
+ * name will be reported by the tool.
120
+ *
121
+ * \subsection CATEGORY_NAMING Category Naming
122
+ *
123
+ * Some function in this library support associating an integer category
124
+ * to enable filtering and sorting. The category naming functions allow
125
+ * the application to associate a user friendly name with the integer
126
+ * category. Support for domains have been added in NVTX_VERSION_2 to
127
+ * avoid collisions when domains are developed independently.
128
+ *
129
+ * \subsection RESOURCE_OBJECTS Resource Objects
130
+ *
131
+ * Resource objects are a generic mechanism for attaching data to an application
132
+ * resource. The identifier field makes the association to a pointer or handle,
133
+ * while the type field helps provide deeper understanding of the identifier as
134
+ * well as enabling differentiation in cases where handles generated by different
135
+ * APIs may collide. The resource object may also have an associated message to
136
+ * associate with the application resource, enabling further annotation of this
137
+ * object and how it is used.
138
+ *
139
+ * The resource object was introduced in NVTX_VERSION_2 to supersede existing naming
140
+ * functions and allow the application resource identified by those functions to be
141
+ * associated to a domain. The other naming functions are still supported for backward
142
+ * compatibility but will be associated only to the default domain.
143
+ *
144
+ * \subsection RESOURCE_NAMING_OS Resource Naming
145
+ *
146
+ * Some operating system resources creation APIs do not support providing a user friendly
147
+ * name, such as some OS thread creation APIs. This API support resource naming though
148
+ * both through resource objects and functions following the pattern
149
+ * nvtxName[RESOURCE_TYPE][A|W](identifier, name). Resource objects introduced in NVTX_VERSION 2
150
+ * supersede the other functions with a a more general method of assigning names to OS resources,
151
+ * along with associating them to domains too. The older nvtxName* functions are only associated
152
+ * with the default domain.
153
+ * \section EXTENSIONS Optional Extensions
154
+ * Optional extensions will either appear within the existing sections the extend or appear
155
+ * in the "Related Pages" when they introduce new concepts.
156
+ */
157
+
158
+ /**
159
+ * Tools Extension API version
160
+ */
161
+ #if defined(NVTX_VERSION) && NVTX_VERSION < 3
162
+ #error "Trying to #include NVTX version 3 in a source file where an older NVTX version has already been included. If you are not directly using NVTX (the NVIDIA Tools Extension library), you are getting this error because libraries you are using have included different versions of NVTX. Suggested solutions are: (1) reorder #includes so the newest NVTX version is included first, (2) avoid using the conflicting libraries in the same .c/.cpp file, or (3) update the library using the older NVTX version to use the newer version instead."
163
+ #endif
164
+
165
+ #if defined(NVTX_AS_SYSTEM_HEADER)
166
+ #if defined(__clang__)
167
+ #pragma clang system_header
168
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
169
+ #pragma GCC system_header
170
+ #elif defined(_MSC_VER)
171
+ #pragma system_header
172
+ #endif
173
+ #endif
174
+
175
+ /* Header guard */
176
+ #if !defined(NVTX_VERSION)
177
+ #define NVTX_VERSION 3
178
+
179
+ /* Platform-dependent defines:
180
+ *
181
+ * - NVTX_API - Calling conventions (only used on Windows, and only effects
182
+ * 32-bit x86 builds, i.e. callee pops stack instead of caller)
183
+ *
184
+ * - NVTX_DYNAMIC_EXPORT - Make function an exported entry point from a
185
+ * dynamic library or shared object.
186
+ *
187
+ * - NVTX_EXPORT_UNMANGLED_FUNCTION_NAME - When used inside the body of a
188
+ * function declared with NVTX_DYNAMIC_EXPORT, ensures the symbol exported
189
+ * for the function is the exact string of the function's name as written
190
+ * in the code. Name-mangling or name-decoration is disabled. Note that
191
+ * on many platforms this is not necessary, since either the function name
192
+ * is already exported verbatim, or the dynamic loader also checks for
193
+ * functions with the mangling applied. Forcing the exports to avoid any
194
+ * mangling simplifies usage across platforms and from other languages.
195
+ */
196
+ #if defined(_WIN32)
197
+
198
+ #define NVTX_API __stdcall
199
+
200
+ #if defined(_MSC_VER)
201
+ #define NVTX_DYNAMIC_EXPORT __declspec(dllexport)
202
+ #else
203
+ #define NVTX_DYNAMIC_EXPORT __attribute__((visibility("default"))) __declspec(dllexport)
204
+ #endif
205
+
206
+ #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_ARM64EC))
207
+ #define NVTX_EXPORT_UNMANGLED_FUNCTION_NAME _Pragma("comment(linker, \"/EXPORT:\" __FUNCTION__ \"=\" __FUNCDNAME__)")
208
+ #else
209
+ #define NVTX_EXPORT_UNMANGLED_FUNCTION_NAME
210
+ #endif
211
+
212
+ #else /* POSIX-like platform */
213
+
214
+ #define NVTX_API
215
+
216
+ #define NVTX_DYNAMIC_EXPORT __attribute__((visibility("default")))
217
+
218
+ #define NVTX_EXPORT_UNMANGLED_FUNCTION_NAME
219
+
220
+ #endif /* Platform-dependent defines */
221
+
222
+ /* Compiler-dependent defines:
223
+ *
224
+ * - NVTX_INLINE_STATIC - Ensure function has internal linkage, and suggest
225
+ * avoiding code-gen of the function. Without this, function has external
226
+ * linkage with a strong symbol, so linker expects only one definition.
227
+ */
228
+ #if defined(_MSC_VER)
229
+
230
+ #define NVTX_INLINE_STATIC __inline static
231
+
232
+ #else /* GCC-like compiler */
233
+
234
+ #if defined(__cplusplus) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L)
235
+ #define NVTX_INLINE_STATIC inline static
236
+ #else
237
+ #define NVTX_INLINE_STATIC __inline__ static
238
+ #endif
239
+
240
+ #endif /* Compiler-dependent defines */
241
+
242
+ #if !defined(NVTX_NULLPTR)
243
+ #if defined(__cplusplus) && __cplusplus >= 201103L
244
+ #define NVTX_NULLPTR nullptr
245
+ #else
246
+ #define NVTX_NULLPTR NULL
247
+ #endif
248
+ #endif
249
+
250
+ #if defined(__cplusplus)
251
+ #define NVTX_STATIC_CAST(type, value) (static_cast<type>(value))
252
+ #define NVTX_REINTERPRET_CAST(type, value) (reinterpret_cast<type>(value))
253
+ #else
254
+ #define NVTX_STATIC_CAST(type, value) ((type)(value))
255
+ #define NVTX_REINTERPRET_CAST(type, value) ((type)(value))
256
+ #endif
257
+
258
+
259
+ /* API linkage/export options:
260
+ *
261
+ * - By default, the NVTX API functions are declared as "inline", with the
262
+ * implementations provided in the headers. This allows multiple .c/.cpp
263
+ * files in the same project to include NVTX headers without duplicate-
264
+ * definition linker errors. An optimizing compiler should inline these
265
+ * implementations, ensuring that the overhead of making an NVTX call is as
266
+ * low as possible, even without enabling link-time optimizations.
267
+ *
268
+ * - NVTX_NO_IMPL - Use when writing NVTX tools. If this macro is defined,
269
+ * the NVTX headers will provide all the typedefs, macros, and declarations
270
+ * of API functions (not marked inline), but no function implementations.
271
+ *
272
+ * - NVTX_EXPORT_API - NVTX is normally used in C/C++ applications by simply
273
+ * including the headers. There is no need to link with a static library,
274
+ * or to ship a dynamic library with the application (this was changed in
275
+ * NVTX v3). For other languages, it's not convenient to use a header-only
276
+ * C library. The best way to provide an idiomatic NVTX API for another
277
+ * language is a .c file that includes the NVTX headers and implements
278
+ * functions for that language using its native calling conventions and
279
+ * datatypes -- this method can allow static linking to avoid depending on
280
+ * a separate dynamic library. Alternatively, other languages may support
281
+ * using C calling conventions to directly call C functions exported from a
282
+ * dynamic library. To build such a library, write a .c file that defines
283
+ * NVTX_EXPORT_API and includes any/all of the NVTX headers. Compile this
284
+ * file as a dynamic library, and the NVTX API functions from the included
285
+ * headers will be exported with no name-mangling or decoration. Defining
286
+ * ABI-compatible NVTX struct and enum types in the other language is the
287
+ * responsibility of the user of this dynamic library.
288
+ *
289
+ * Whichever of the above modes is chosen, the following macros are defined
290
+ * appropriately below to implement that mode. These macros are only defined
291
+ * if not already defined by the user, so they may be overridden by users to
292
+ * handle advanced cases.
293
+ *
294
+ * - NVTX_DECLSPEC - Specify linkage for NVTX API functions.
295
+ *
296
+ * - NVTX_SET_NAME_MANGLING_OPTIONS - If necessary for the platform, will use
297
+ * platform-dependent syntax for ensuring function name is exported with no
298
+ * name-mangling or decoration. Certain compiler and calling-convention
299
+ * combinations will add name-mangling or decorations when exporting NVTX
300
+ * function name symbols, which makes it much harder for other languages
301
+ * to access these functions. This macro must be used inside a function's
302
+ * body because it uses built-in macros to get the current function's name.
303
+ */
304
+ #if defined(NVTX_NO_IMPL)
305
+
306
+ /* When omitting implementation, avoid declaring functions inline
307
+ * without definitions, since this causes compiler warnings. */
308
+ #if !defined(NVTX_DECLSPEC)
309
+ #define NVTX_DECLSPEC
310
+ #endif
311
+ #if !defined(NVTX_SET_NAME_MANGLING_OPTIONS)
312
+ #define NVTX_SET_NAME_MANGLING_OPTIONS
313
+ #endif
314
+
315
+ #elif defined(NVTX_EXPORT_API)
316
+
317
+ /* Add platform-dependent declaration syntax to ensure NVTX API functions are
318
+ * exported when compiling as a dynamic library/shared object, and ensure the
319
+ * exported names are not mangled/decorated. */
320
+ #if !defined(NVTX_DECLSPEC)
321
+ #define NVTX_DECLSPEC NVTX_DYNAMIC_EXPORT
322
+ #endif
323
+ #if !defined(NVTX_SET_NAME_MANGLING_OPTIONS)
324
+ #define NVTX_SET_NAME_MANGLING_OPTIONS NVTX_EXPORT_UNMANGLED_FUNCTION_NAME
325
+ #endif
326
+
327
+ #else /* Normal NVTX usage */
328
+
329
+ /* Functions definitions are provided, and functions are declared inline to
330
+ * avoid duplicate-definition linker errors when using multiple source files. */
331
+ #if !defined(NVTX_DECLSPEC)
332
+ #define NVTX_DECLSPEC NVTX_INLINE_STATIC
333
+ #endif
334
+ #if !defined(NVTX_SET_NAME_MANGLING_OPTIONS)
335
+ #define NVTX_SET_NAME_MANGLING_OPTIONS
336
+ #endif
337
+
338
+ #endif
339
+
340
+ /* Platform-dependent helpers for defining global variables in header files.
341
+ * Ensures the linker uses only one instance when multiple source files include
342
+ * the headers, avoiding duplicate-definition linker errors. */
343
+ #include "nvtxDetail/nvtxLinkOnce.h"
344
+
345
+ /* Macros for applying major-version-specific suffix to NVTX global symbols, so
346
+ * usage of different versions in different source files is supported without
347
+ * violating the one-definition rule. */
348
+ #define NVTX_VERSIONED_IDENTIFIER_L3(NAME, VERSION) NAME##_v##VERSION
349
+ #define NVTX_VERSIONED_IDENTIFIER_L2(NAME, VERSION) NVTX_VERSIONED_IDENTIFIER_L3(NAME, VERSION)
350
+ #define NVTX_VERSIONED_IDENTIFIER(NAME) NVTX_VERSIONED_IDENTIFIER_L2(NAME, NVTX_VERSION)
351
+
352
+ /**
353
+ * The NVTX library depends on stdint.h. If the build tool chain in use
354
+ * does not include stdint.h, then define NVTX_STDINT_TYPES_ALREADY_DEFINED
355
+ * and define the following types:
356
+ * <ul>
357
+ * <li>uint8_t
358
+ * <li>int8_t
359
+ * <li>uint16_t
360
+ * <li>int16_t
361
+ * <li>uint32_t
362
+ * <li>int32_t
363
+ * <li>uint64_t
364
+ * <li>int64_t
365
+ * <li>uintptr_t
366
+ * <li>intptr_t
367
+ * </ul>
368
+ * Be sure to define NVTX_STDINT_TYPES_ALREADY_DEFINED if you are using your
369
+ * own definitions instead of stdint.h.
370
+ */
371
+ #ifndef NVTX_STDINT_TYPES_ALREADY_DEFINED
372
+ #include <stdint.h>
373
+ #endif
374
+
375
+ #include <stddef.h>
376
+
377
+ #ifdef __cplusplus
378
+ extern "C" {
379
+ #endif /* __cplusplus */
380
+
381
+ /**
382
+ * Result Codes used for the NVTX tool loader.
383
+ */
384
+ #define NVTX_SUCCESS 0
385
+ #define NVTX_FAIL 1
386
+ #define NVTX_ERR_INIT_LOAD_PROPERTY 2
387
+ #define NVTX_ERR_INIT_ACCESS_LIBRARY 3
388
+ #define NVTX_ERR_INIT_LOAD_LIBRARY 4
389
+ #define NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT 5
390
+ #define NVTX_ERR_INIT_FAILED_LIBRARY_ENTRY_POINT 6
391
+ #define NVTX_ERR_NO_INJECTION_LIBRARY_AVAILABLE 7
392
+
393
+ /**
394
+ * Size of the nvtxEventAttributes_t structure.
395
+ */
396
+ #define NVTX_EVENT_ATTRIB_STRUCT_SIZE (NVTX_STATIC_CAST(uint16_t, sizeof(nvtxEventAttributes_t)))
397
+
398
+ #define NVTX_NO_PUSH_POP_TRACKING (NVTX_STATIC_CAST(int, -2))
399
+
400
+ typedef uint64_t nvtxRangeId_t;
401
+
402
+ /* Forward declaration of opaque domain registration structure */
403
+ struct nvtxDomainRegistration_st;
404
+ typedef struct nvtxDomainRegistration_st nvtxDomainRegistration;
405
+
406
+ /* \brief Domain Handle Structure.
407
+ * \anchor DOMAIN_HANDLE_STRUCTURE
408
+ *
409
+ * This structure is opaque to the user and is used as a handle to reference
410
+ * a domain. This type is returned from tools when using the NVTX API to
411
+ * create a domain.
412
+ *
413
+ */
414
+ typedef nvtxDomainRegistration* nvtxDomainHandle_t;
415
+
416
+ /* Forward declaration of opaque string registration structure */
417
+ struct nvtxStringRegistration_st;
418
+ typedef struct nvtxStringRegistration_st nvtxStringRegistration;
419
+
420
+ /* \brief Registered String Handle Structure.
421
+ * \anchor REGISTERED_STRING_HANDLE_STRUCTURE
422
+ *
423
+ * This structure is opaque to the user and is used as a handle to reference
424
+ * a registered string. This type is returned from tools when using the NVTX
425
+ * API to create a registered string.
426
+ *
427
+ */
428
+ typedef nvtxStringRegistration* nvtxStringHandle_t;
429
+
430
+ /* ========================================================================= */
431
+ /** \defgroup GENERAL General
432
+ * @{
433
+ */
434
+
435
+ /** ---------------------------------------------------------------------------
436
+ * Color Types
437
+ * ------------------------------------------------------------------------- */
438
+ typedef enum nvtxColorType_t
439
+ {
440
+ NVTX_COLOR_UNKNOWN = 0, /**< Color attribute is unused. */
441
+ NVTX_COLOR_ARGB = 1 /**< An ARGB color is provided. */
442
+ } nvtxColorType_t;
443
+
444
+ /** ---------------------------------------------------------------------------
445
+ * Message Types
446
+ * ------------------------------------------------------------------------- */
447
+ typedef enum nvtxMessageType_t
448
+ {
449
+ NVTX_MESSAGE_UNKNOWN = 0, /**< Message attribute is unused. */
450
+ NVTX_MESSAGE_TYPE_ASCII = 1, /**< A character sequence is used as payload. */
451
+ NVTX_MESSAGE_TYPE_UNICODE = 2, /**< A wide character sequence is used as payload. */
452
+ /* NVTX_VERSION_2 */
453
+ NVTX_MESSAGE_TYPE_REGISTERED = 3 /**< A unique string handle that was registered
454
+ with \ref nvtxDomainRegisterStringA() or
455
+ \ref nvtxDomainRegisterStringW(). */
456
+ } nvtxMessageType_t;
457
+
458
+ typedef union nvtxMessageValue_t
459
+ {
460
+ const char* ascii;
461
+ const wchar_t* unicode;
462
+ /* NVTX_VERSION_2 */
463
+ nvtxStringHandle_t registered;
464
+ } nvtxMessageValue_t;
465
+
466
+
467
+ /* ------------------------------------------------------------------------- */
468
+ /** \brief Force initialization (optional)
469
+ * \anchor FORCE_INITIALIZATION
470
+ *
471
+ * Force NVTX library to initialize. The first call to any NVTX API function
472
+ * will automatically initialize the entire API. This can make the first call
473
+ * much slower than subsequent calls. In applications where the first call to
474
+ * NVTX may be in a performance-critical section, calling nvtxInitialize before
475
+ * any performance-critical sections will ensure NVTX initialization occurs at
476
+ * an acceptable time. Since nvtxInitialize takes no parameters and has no
477
+ * expected behavior besides initialization, it is convenient to add a call to
478
+ * nvtxInitialize in NVTX-instrumented applications that need to force earlier
479
+ * initialization without changing any other code. For example, if an app's
480
+ * first NVTX call is nvtxDomainCreate, and it is difficult to move that call
481
+ * earlier because the domain handle must be stored in an object only created
482
+ * at that point, adding a call to nvtxInitialize at the top of main() will
483
+ * ensure the later call to nvtxDomainCreate is as fast as possible.
484
+ *
485
+ * \version NVTX_VERSION_3
486
+ *
487
+ * \param reserved - must be zero or NULL.
488
+ *
489
+ * @{ */
490
+ NVTX_DECLSPEC void NVTX_API nvtxInitialize(const void* reserved);
491
+ /** @} */
492
+
493
+
494
+ /** @} */ /*END defgroup*/
495
+
496
+ /* ========================================================================= */
497
+ /** \defgroup EVENT_ATTRIBUTES Event Attributes
498
+ * @{
499
+ */
500
+
501
+ /** ---------------------------------------------------------------------------
502
+ * Payload Types
503
+ * ------------------------------------------------------------------------- */
504
+ typedef enum nvtxPayloadType_t
505
+ {
506
+ NVTX_PAYLOAD_UNKNOWN = 0, /**< Payload attribute is unused. */
507
+ NVTX_PAYLOAD_TYPE_UNSIGNED_INT64 = 1, /**< A 64 bit unsigned integer value is used as payload. */
508
+ NVTX_PAYLOAD_TYPE_INT64 = 2, /**< A 64 bit signed integer value is used as payload. */
509
+ NVTX_PAYLOAD_TYPE_DOUBLE = 3, /**< A 64 bit floating point value is used as payload. */
510
+ /* NVTX_VERSION_2 */
511
+ NVTX_PAYLOAD_TYPE_UNSIGNED_INT32 = 4, /**< A 32 bit floating point value is used as payload. */
512
+ NVTX_PAYLOAD_TYPE_INT32 = 5, /**< A 32 bit floating point value is used as payload. */
513
+ NVTX_PAYLOAD_TYPE_FLOAT = 6 /**< A 32 bit floating point value is used as payload. */
514
+ } nvtxPayloadType_t;
515
+
516
+ /** \brief Event Attribute Structure.
517
+ * \anchor EVENT_ATTRIBUTE_STRUCTURE
518
+ *
519
+ * This structure is used to describe the attributes of an event. The layout of
520
+ * the structure is defined by a specific version of the tools extension
521
+ * library and can change between different versions of the Tools Extension
522
+ * library.
523
+ *
524
+ * \par Guidelines
525
+ * The caller should always perform the following three tasks when using
526
+ * attributes:
527
+ * <ul>
528
+ * <li>Zero the structure
529
+ * <li>Set the version field
530
+ * <li>Set the size field
531
+ * </ul>
532
+ *
533
+ * Zeroing the structure sets all the event attributes types and values
534
+ * to the default value.
535
+ *
536
+ * The version and size field are used by the Tools Extension
537
+ * implementation to handle multiple versions of the attributes structure.
538
+ *
539
+ * It is recommended that the caller use one of the following to methods
540
+ * to initialize the event attributes structure:
541
+ *
542
+ * \par Method 1
543
+ * Initializing nvtxEventAttributes for future compatibility:
544
+ * \code
545
+ * nvtxEventAttributes_t eventAttrib = {0};
546
+ * eventAttrib.version = NVTX_VERSION;
547
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
548
+ * \endcode
549
+ *
550
+ * \par Method 2
551
+ * Initializing nvtxEventAttributes for a specific version:
552
+ * \code
553
+ * nvtxEventAttributes_t eventAttrib = {0};
554
+ * eventAttrib.version = 1;
555
+ * eventAttrib.size = (uint16_t)(sizeof(nvtxEventAttributes_v1));
556
+ * \endcode
557
+ *
558
+ * If the caller uses Method 1 it is critical that the entire binary
559
+ * layout of the structure be configured to 0 so that all fields
560
+ * are initialized to the default value.
561
+ *
562
+ * The caller should either use both NVTX_VERSION and
563
+ * NVTX_EVENT_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
564
+ * and a versioned type (Method 2). Using a mix of the two methods
565
+ * will likely cause either source level incompatibility or binary
566
+ * incompatibility in the future.
567
+ *
568
+ * \par Example
569
+ * Populate an attributes structure:
570
+ * \code
571
+ * // Initialize
572
+ * nvtxEventAttributes_t eventAttrib = {0};
573
+ * eventAttrib.version = NVTX_VERSION;
574
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
575
+ *
576
+ * // Configure the Attributes
577
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
578
+ * eventAttrib.color = 0xFF880000;
579
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
580
+ * eventAttrib.message.ascii = "Example";
581
+ * \endcode
582
+ *
583
+ * In the example the caller does not have to set the value of
584
+ * \ref ::nvtxEventAttributes_v2::category or
585
+ * \ref ::nvtxEventAttributes_v2::payload as these fields were set to
586
+ * the default value by {0}.
587
+ * \sa
588
+ * ::nvtxDomainMarkEx
589
+ * ::nvtxDomainRangeStartEx
590
+ * ::nvtxDomainRangePushEx
591
+ */
592
+ typedef struct nvtxEventAttributes_v2
593
+ {
594
+ /**
595
+ * \brief Version flag of the structure.
596
+ *
597
+ * Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
598
+ * supported in this header file. This can optionally be overridden to
599
+ * another version of the tools extension library.
600
+ */
601
+ uint16_t version;
602
+
603
+ /**
604
+ * \brief Size of the structure.
605
+ *
606
+ * Needs to be set to the size in bytes of the event attribute
607
+ * structure used to specify the event.
608
+ */
609
+ uint16_t size;
610
+
611
+ /**
612
+ * \brief ID of the category the event is assigned to.
613
+ *
614
+ * A category is a user-controlled ID that can be used to group
615
+ * events. The tool may use category IDs to improve filtering or
616
+ * enable grouping of events in the same category. The functions
617
+ * \ref ::nvtxNameCategoryA or \ref ::nvtxNameCategoryW can be used
618
+ * to name a category.
619
+ *
620
+ * Default Value is 0
621
+ */
622
+ uint32_t category;
623
+
624
+ /** \brief Color type specified in this attribute structure.
625
+ *
626
+ * Defines the color format of the attribute structure's \ref COLOR_FIELD
627
+ * "color" field.
628
+ *
629
+ * Default Value is NVTX_COLOR_UNKNOWN
630
+ */
631
+ int32_t colorType; /* nvtxColorType_t */
632
+
633
+ /** \brief Color assigned to this event. \anchor COLOR_FIELD
634
+ *
635
+ * The color that the tool should use to visualize the event.
636
+ */
637
+ uint32_t color;
638
+
639
+ /**
640
+ * \brief Payload type specified in this attribute structure.
641
+ *
642
+ * Defines the payload format of the attribute structure's \ref PAYLOAD_FIELD
643
+ * "payload" field.
644
+ *
645
+ * Default Value is NVTX_PAYLOAD_UNKNOWN
646
+ */
647
+ int32_t payloadType; /* nvtxPayloadType_t */
648
+
649
+ int32_t reserved0;
650
+
651
+ /**
652
+ * \brief Payload assigned to this event. \anchor PAYLOAD_FIELD
653
+ *
654
+ * A numerical value that can be used to annotate an event. The tool could
655
+ * use the payload data to reconstruct graphs and diagrams.
656
+ */
657
+ union payload_t
658
+ {
659
+ uint64_t ullValue;
660
+ int64_t llValue;
661
+ double dValue;
662
+ /* NVTX_VERSION_2 */
663
+ uint32_t uiValue;
664
+ int32_t iValue;
665
+ float fValue;
666
+ } payload;
667
+
668
+ /** \brief Message type specified in this attribute structure.
669
+ *
670
+ * Defines the message format of the attribute structure's \ref MESSAGE_FIELD
671
+ * "message" field.
672
+ *
673
+ * Default Value is NVTX_MESSAGE_UNKNOWN
674
+ */
675
+ int32_t messageType; /* nvtxMessageType_t */
676
+
677
+ /** \brief Message assigned to this attribute structure. \anchor MESSAGE_FIELD
678
+ *
679
+ * The text message that is attached to an event.
680
+ */
681
+ nvtxMessageValue_t message;
682
+
683
+ } nvtxEventAttributes_v2;
684
+
685
+ typedef struct nvtxEventAttributes_v2 nvtxEventAttributes_t;
686
+
687
+ /** @} */ /*END defgroup*/
688
+ /* ========================================================================= */
689
+ /** \defgroup MARKERS_AND_RANGES Markers and Ranges
690
+ *
691
+ * See \ref MARKERS_AND_RANGES for more details
692
+ *
693
+ * @{
694
+ */
695
+
696
+ /** \name Marker */
697
+
698
+ /* ------------------------------------------------------------------------- */
699
+ /** \brief Marks an instantaneous event in the application.
700
+ *
701
+ * A marker can contain a text message or specify additional information
702
+ * using the event attributes structure. These attributes include a text
703
+ * message, color, category, and a payload. Each of the attributes is optional
704
+ * and can only be sent out using the \ref nvtxDomainMarkEx function.
705
+ *
706
+ * nvtxDomainMarkEx(NULL, event) is equivalent to calling
707
+ * nvtxMarkEx(event).
708
+ *
709
+ * \param domain - The domain of scoping the category.
710
+ * \param eventAttrib - The event attribute structure defining the marker's
711
+ * attribute types and attribute values.
712
+ *
713
+ * \sa
714
+ * ::nvtxMarkEx
715
+ *
716
+ * \version NVTX_VERSION_2
717
+ * @{ */
718
+ NVTX_DECLSPEC void NVTX_API nvtxDomainMarkEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
719
+ /** @} */
720
+
721
+ /* ------------------------------------------------------------------------- */
722
+ /** \brief Marks an instantaneous event in the application.
723
+ *
724
+ * A marker can contain a text message or specify additional information
725
+ * using the event attributes structure. These attributes include a text
726
+ * message, color, category, and a payload. Each of the attributes is optional
727
+ * and can only be sent out using the \ref nvtxMarkEx function.
728
+ * If \ref nvtxMarkA or \ref nvtxMarkW are used to specify the marker
729
+ * or if an attribute is unspecified then a default value will be used.
730
+ *
731
+ * \param eventAttrib - The event attribute structure defining the marker's
732
+ * attribute types and attribute values.
733
+ *
734
+ * \par Example
735
+ * Place a mark with attributes:
736
+ * \code
737
+ * // zero the structure
738
+ * nvtxEventAttributes_t eventAttrib = {0};
739
+ * // set the version and the size information
740
+ * eventAttrib.version = NVTX_VERSION;
741
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
742
+ * // configure the attributes. 0 is the default for all attributes.
743
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
744
+ * eventAttrib.color = 0xFF880000;
745
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
746
+ * eventAttrib.message.ascii = "Example nvtxMarkEx";
747
+ * nvtxMarkEx(&eventAttrib);
748
+ * \endcode
749
+ *
750
+ * \sa
751
+ * ::nvtxDomainMarkEx
752
+ *
753
+ * \version NVTX_VERSION_1
754
+ * @{ */
755
+ NVTX_DECLSPEC void NVTX_API nvtxMarkEx(const nvtxEventAttributes_t* eventAttrib);
756
+ /** @} */
757
+
758
+ /* ------------------------------------------------------------------------- */
759
+ /** \brief Marks an instantaneous event in the application.
760
+ *
761
+ * A marker created using \ref nvtxMarkA or \ref nvtxMarkW contains only a
762
+ * text message.
763
+ *
764
+ * \param message - The message associated to this marker event.
765
+ *
766
+ * \par Example
767
+ * Place a mark:
768
+ * \code
769
+ * nvtxMarkA("Example nvtxMarkA");
770
+ * nvtxMarkW(L"Example nvtxMarkW");
771
+ * \endcode
772
+ *
773
+ * \sa
774
+ * ::nvtxDomainMarkEx
775
+ * ::nvtxMarkEx
776
+ *
777
+ * \version NVTX_VERSION_0
778
+ * @{ */
779
+ NVTX_DECLSPEC void NVTX_API nvtxMarkA(const char* message);
780
+ NVTX_DECLSPEC void NVTX_API nvtxMarkW(const wchar_t* message);
781
+ /** @} */
782
+
783
+
784
+ /** \name Process Ranges */
785
+
786
+ /* ------------------------------------------------------------------------- */
787
+ /** \brief Starts a process range in a domain.
788
+ *
789
+ * \param domain - The domain of scoping the category.
790
+ * \param eventAttrib - The event attribute structure defining the range's
791
+ * attribute types and attribute values.
792
+ *
793
+ * \return The unique ID used to correlate a pair of Start and End events.
794
+ *
795
+ * \remarks Ranges defined by Start/End can overlap.
796
+ *
797
+ * \par Example
798
+ * Start a range with attributes for a domain:
799
+ * \code
800
+ * nvtxDomainHandle_t domain = nvtxDomainCreateA("my domain");
801
+ * nvtxEventAttributes_t eventAttrib = {0};
802
+ * eventAttrib.version = NVTX_VERSION;
803
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
804
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
805
+ * eventAttrib.message.ascii = "my range";
806
+ * nvtxRangeId_t rangeId = nvtxDomainRangeStartEx(domain, &eventAttrib);
807
+ * // ...
808
+ * nvtxDomainRangeEnd(domain, rangeId);
809
+ * \endcode
810
+ *
811
+ * \sa
812
+ * ::nvtxDomainRangeEnd
813
+ *
814
+ * \version NVTX_VERSION_2
815
+ * @{ */
816
+ NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxDomainRangeStartEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
817
+ /** @} */
818
+
819
+ /* ------------------------------------------------------------------------- */
820
+ /** \brief Starts a process range.
821
+ *
822
+ * \param eventAttrib - The event attribute structure defining the range's
823
+ * attribute types and attribute values.
824
+ *
825
+ * \return The unique ID used to correlate a pair of Start and End events.
826
+ *
827
+ * \remarks Ranges defined by Start/End can overlap.
828
+ *
829
+ * \par Example
830
+ * Start a range with attributes:
831
+ * \code
832
+ * nvtxEventAttributes_t eventAttrib = {0};
833
+ * eventAttrib.version = NVTX_VERSION;
834
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
835
+ * eventAttrib.category = 3;
836
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
837
+ * eventAttrib.color = 0xFF0088FF;
838
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
839
+ * eventAttrib.message.ascii = "Example Range";
840
+ * nvtxRangeId_t rangeId = nvtxRangeStartEx(&eventAttrib);
841
+ * // ...
842
+ * nvtxRangeEnd(rangeId);
843
+ * \endcode
844
+ *
845
+ * \sa
846
+ * ::nvtxRangeEnd
847
+ * ::nvtxDomainRangeStartEx
848
+ *
849
+ * \version NVTX_VERSION_1
850
+ * @{ */
851
+ NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartEx(const nvtxEventAttributes_t* eventAttrib);
852
+ /** @} */
853
+
854
+ /* ------------------------------------------------------------------------- */
855
+ /** \brief Starts a process range.
856
+ *
857
+ * \param message - The event message associated to this range event.
858
+ *
859
+ * \return The unique ID used to correlate a pair of Start and End events.
860
+ *
861
+ * \remarks Ranges defined by Start/End can overlap.
862
+ *
863
+ * \par Example
864
+ * Start a range:
865
+ * \code
866
+ * nvtxRangeId_t r1 = nvtxRangeStartA("Range 1");
867
+ * nvtxRangeId_t r2 = nvtxRangeStartW(L"Range 2");
868
+ * nvtxRangeEnd(r1);
869
+ * nvtxRangeEnd(r2);
870
+ * \endcode
871
+ *
872
+ * \sa
873
+ * ::nvtxRangeEnd
874
+ * ::nvtxRangeStartEx
875
+ * ::nvtxDomainRangeStartEx
876
+ *
877
+ * \version NVTX_VERSION_0
878
+ * @{ */
879
+ NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartA(const char* message);
880
+ NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartW(const wchar_t* message);
881
+ /** @} */
882
+
883
+ /* ------------------------------------------------------------------------- */
884
+ /** \brief Ends a process range.
885
+ *
886
+ * \param domain - The domain
887
+ * \param id - The correlation ID returned from a nvtxRangeStart call.
888
+ *
889
+ * \remarks This function is offered completeness but is an alias for ::nvtxRangeEnd.
890
+ * It does not need a domain param since that is associated with the range ID at ::nvtxDomainRangeStartEx
891
+ *
892
+ * \par Example
893
+ * End a range for a domain:
894
+ * \code
895
+ * nvtxDomainHandle_t domain = nvtxDomainCreateA("my domain");
896
+ * nvtxEventAttributes_t eventAttrib = {0};
897
+ * eventAttrib.version = NVTX_VERSION;
898
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
899
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
900
+ * eventAttrib.message.ascii = "my range";
901
+ * nvtxRangeId_t rangeId = nvtxDomainRangeStartEx(domain, &eventAttrib);
902
+ * // ...
903
+ * nvtxDomainRangeEnd(domain, rangeId);
904
+ * \endcode
905
+ *
906
+ * \sa
907
+ * ::nvtxDomainRangeStartEx
908
+ *
909
+ * \version NVTX_VERSION_2
910
+ * @{ */
911
+ NVTX_DECLSPEC void NVTX_API nvtxDomainRangeEnd(nvtxDomainHandle_t domain, nvtxRangeId_t id);
912
+ /** @} */
913
+
914
+ /* ------------------------------------------------------------------------- */
915
+ /** \brief Ends a process range.
916
+ *
917
+ * \param id - The correlation ID returned from an nvtxRangeStart call.
918
+ *
919
+ * \sa
920
+ * ::nvtxDomainRangeStartEx
921
+ * ::nvtxRangeStartEx
922
+ * ::nvtxRangeStartA
923
+ * ::nvtxRangeStartW
924
+ *
925
+ * \version NVTX_VERSION_0
926
+ * @{ */
927
+ NVTX_DECLSPEC void NVTX_API nvtxRangeEnd(nvtxRangeId_t id);
928
+ /** @} */
929
+
930
+ /** \name Thread Ranges */
931
+
932
+ /* ------------------------------------------------------------------------- */
933
+ /** \brief Starts a nested thread range.
934
+ *
935
+ * \param domain - The domain of scoping.
936
+ * \param eventAttrib - The event attribute structure defining the range's
937
+ * attribute types and attribute values.
938
+ *
939
+ * \return The 0 based level of range being started. This value is scoped to the domain.
940
+ * If an error occurs, a negative value is returned.
941
+ *
942
+ * \par Example
943
+ * Push a range with attributes for a domain:
944
+ * \code
945
+ * nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
946
+ * nvtxEventAttributes_t eventAttrib = {0};
947
+ * eventAttrib.version = NVTX_VERSION;
948
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
949
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
950
+ * eventAttrib.color = 0xFFFF0000;
951
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
952
+ * eventAttrib.message.ascii = "Level 0";
953
+ * nvtxDomainRangePushEx(domain, &eventAttrib);
954
+ *
955
+ * // Re-use eventAttrib
956
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_UNICODE;
957
+ * eventAttrib.message.unicode = L"Level 1";
958
+ * nvtxDomainRangePushEx(domain, &eventAttrib);
959
+ *
960
+ * nvtxDomainRangePop(domain); // Level 1
961
+ * nvtxDomainRangePop(domain); // Level 0
962
+ * \endcode
963
+ *
964
+ * \sa
965
+ * ::nvtxDomainRangePop
966
+ *
967
+ * \version NVTX_VERSION_2
968
+ * @{ */
969
+ NVTX_DECLSPEC int NVTX_API nvtxDomainRangePushEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
970
+ /** @} */
971
+
972
+ /* ------------------------------------------------------------------------- */
973
+ /** \brief Starts a nested thread range.
974
+ *
975
+ * \param eventAttrib - The event attribute structure defining the range's
976
+ * attribute types and attribute values.
977
+ *
978
+ * \return The 0 based level of range being started. This level is per domain.
979
+ * If an error occurs a negative value is returned.
980
+ *
981
+ * \par Example
982
+ * Push a range with attributes:
983
+ * \code
984
+ * nvtxEventAttributes_t eventAttrib = {0};
985
+ * eventAttrib.version = NVTX_VERSION;
986
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
987
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
988
+ * eventAttrib.color = 0xFFFF0000;
989
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
990
+ * eventAttrib.message.ascii = "Level 0";
991
+ * nvtxRangePushEx(&eventAttrib);
992
+ *
993
+ * // Re-use eventAttrib
994
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_UNICODE;
995
+ * eventAttrib.message.unicode = L"Level 1";
996
+ * nvtxRangePushEx(&eventAttrib);
997
+ *
998
+ * nvtxRangePop(); // Level 1
999
+ * nvtxRangePop(); // Level 0
1000
+ * \endcode
1001
+ *
1002
+ * \sa
1003
+ * ::nvtxDomainRangePushEx
1004
+ * ::nvtxRangePop
1005
+ *
1006
+ * \version NVTX_VERSION_1
1007
+ * @{ */
1008
+ NVTX_DECLSPEC int NVTX_API nvtxRangePushEx(const nvtxEventAttributes_t* eventAttrib);
1009
+ /** @} */
1010
+
1011
+ /* ------------------------------------------------------------------------- */
1012
+ /** \brief Starts a nested thread range.
1013
+ *
1014
+ * \param message - The event message associated to this range event.
1015
+ *
1016
+ * \return The 0 based level of range being started. If an error occurs a
1017
+ * negative value is returned.
1018
+ *
1019
+ * \par Example
1020
+ * Push a range:
1021
+ * \code
1022
+ * nvtxRangePushA("Level 0");
1023
+ * nvtxRangePushW(L"Level 1");
1024
+ * nvtxRangePop(); // Level 1
1025
+ * nvtxRangePop(); // Level 0
1026
+ * \endcode
1027
+ *
1028
+ * \sa
1029
+ * ::nvtxDomainRangePushEx
1030
+ * ::nvtxRangePop
1031
+ *
1032
+ * \version NVTX_VERSION_0
1033
+ * @{ */
1034
+ NVTX_DECLSPEC int NVTX_API nvtxRangePushA(const char* message);
1035
+ NVTX_DECLSPEC int NVTX_API nvtxRangePushW(const wchar_t* message);
1036
+ /** @} */
1037
+
1038
+
1039
+ /* ------------------------------------------------------------------------- */
1040
+ /** \brief Ends a nested thread range.
1041
+ *
1042
+ * \return The level of the range being ended. If an error occurs a negative
1043
+ * value is returned on the current thread.
1044
+ *
1045
+ * \par Example
1046
+ * Pop a range for a domain:
1047
+ * \code
1048
+ * nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
1049
+ * nvtxEventAttributes_t eventAttrib = {0};
1050
+ * eventAttrib.version = NVTX_VERSION;
1051
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
1052
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
1053
+ * eventAttrib.color = 0xFFFF0000;
1054
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
1055
+ * eventAttrib.message.ascii = "Level 0";
1056
+ * nvtxDomainRangePushEx(domain, &eventAttrib);
1057
+ *
1058
+ * // Re-use eventAttrib
1059
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_UNICODE;
1060
+ * eventAttrib.message.unicode = L"Level 1";
1061
+ * nvtxDomainRangePushEx(domain, &eventAttrib);
1062
+ *
1063
+ * nvtxDomainRangePop(domain); // Level 1
1064
+ * nvtxDomainRangePop(domain); // Level 0
1065
+ * \endcode
1066
+ *
1067
+ * \sa
1068
+ * ::nvtxRangePushEx
1069
+ * ::nvtxRangePushA
1070
+ * ::nvtxRangePushW
1071
+ *
1072
+ * \version NVTX_VERSION_2
1073
+ * @{ */
1074
+ NVTX_DECLSPEC int NVTX_API nvtxDomainRangePop(nvtxDomainHandle_t domain);
1075
+ /** @} */
1076
+
1077
+ /* ------------------------------------------------------------------------- */
1078
+ /** \brief Ends a nested thread range.
1079
+ *
1080
+ * \return The level of the range being ended. If an error occurs a negative
1081
+ * value is returned on the current thread.
1082
+ *
1083
+ * \par Example
1084
+ * Pop a range:
1085
+ * \code
1086
+ * nvtxRangePushA("Level 0");
1087
+ * nvtxRangePushW(L"Level 1");
1088
+ * nvtxRangePop(); // Level 1
1089
+ * nvtxRangePop(); // Level 0
1090
+ * \endcode
1091
+ *
1092
+ * \sa
1093
+ * ::nvtxRangePushEx
1094
+ * ::nvtxRangePushA
1095
+ * ::nvtxRangePushW
1096
+ *
1097
+ * \version NVTX_VERSION_0
1098
+ * @{ */
1099
+ NVTX_DECLSPEC int NVTX_API nvtxRangePop(void);
1100
+ /** @} */
1101
+
1102
+
1103
+ /** @} */ /*END defgroup*/
1104
+ /* ========================================================================= */
1105
+ /** \defgroup RESOURCE_NAMING Resource Naming
1106
+ *
1107
+ * See \ref RESOURCE_NAMING for more details
1108
+ *
1109
+ * @{
1110
+ */
1111
+
1112
+
1113
+ /* ------------------------------------------------------------------------- */
1114
+ /** \name Functions for Generic Resource Naming*/
1115
+ /* ------------------------------------------------------------------------- */
1116
+
1117
+ /* ------------------------------------------------------------------------- */
1118
+ /** \cond SHOW_HIDDEN
1119
+ * \brief Resource typing helpers.
1120
+ *
1121
+ * Classes are used to make it easy to create a series of resource types
1122
+ * per API without collisions
1123
+ */
1124
+ #define NVTX_RESOURCE_MAKE_TYPE(CLASS, INDEX) (((NVTX_STATIC_CAST(uint32_t, NVTX_RESOURCE_CLASS_ ## CLASS))<<16)|(NVTX_STATIC_CAST(uint32_t, INDEX)))
1125
+ #define NVTX_RESOURCE_CLASS_GENERIC 1
1126
+ /** \endcond */
1127
+
1128
+ /* ------------------------------------------------------------------------- */
1129
+ /** \brief Generic resource type for when a resource class is not available.
1130
+ *
1131
+ * \sa
1132
+ * ::nvtxDomainResourceCreate
1133
+ *
1134
+ * \version NVTX_VERSION_2
1135
+ */
1136
+ typedef enum nvtxResourceGenericType_t
1137
+ {
1138
+ NVTX_RESOURCE_TYPE_UNKNOWN = 0,
1139
+ NVTX_RESOURCE_TYPE_GENERIC_POINTER = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 1), /**< Generic pointer assumed to have no collisions with other pointers. */
1140
+ NVTX_RESOURCE_TYPE_GENERIC_HANDLE = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 2), /**< Generic handle assumed to have no collisions with other handles. */
1141
+ NVTX_RESOURCE_TYPE_GENERIC_THREAD_NATIVE = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 3), /**< OS native thread identifier. */
1142
+ NVTX_RESOURCE_TYPE_GENERIC_THREAD_POSIX = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 4) /**< POSIX pthread identifier. */
1143
+ } nvtxResourceGenericType_t;
1144
+
1145
+
1146
+
1147
+ /** \brief Resource Attribute Structure.
1148
+ * \anchor RESOURCE_ATTRIBUTE_STRUCTURE
1149
+ *
1150
+ * This structure is used to describe the attributes of a resource. The layout of
1151
+ * the structure is defined by a specific version of the tools extension
1152
+ * library and can change between different versions of the Tools Extension
1153
+ * library.
1154
+ *
1155
+ * \par Guidelines
1156
+ * The caller should always perform the following three tasks when using
1157
+ * attributes:
1158
+ * <ul>
1159
+ * <li>Zero the structure
1160
+ * <li>Set the version field
1161
+ * <li>Set the size field
1162
+ * </ul>
1163
+ *
1164
+ * Zeroing the structure sets all the resource attributes types and values
1165
+ * to the default value.
1166
+ *
1167
+ * The version and size field are used by the Tools Extension
1168
+ * implementation to handle multiple versions of the attributes structure.
1169
+ *
1170
+ * It is recommended that the caller use one of the following to methods
1171
+ * to initialize the event attributes structure:
1172
+ *
1173
+ * \par Method 1
1174
+ * Initializing nvtxEventAttributes for future compatibility:
1175
+ * \code
1176
+ * nvtxResourceAttributes_t attribs = {0};
1177
+ * attribs.version = NVTX_VERSION;
1178
+ * attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
1179
+ * \endcode
1180
+ *
1181
+ * \par Method 2
1182
+ * Initializing nvtxEventAttributes for a specific version:
1183
+ * \code
1184
+ * nvtxResourceAttributes_v0 attribs = {0};
1185
+ * attribs.version = 2;
1186
+ * attribs.size = (uint16_t)(sizeof(nvtxResourceAttributes_v0));
1187
+ * \endcode
1188
+ *
1189
+ * If the caller uses Method 1 it is critical that the entire binary
1190
+ * layout of the structure be configured to 0 so that all fields
1191
+ * are initialized to the default value.
1192
+ *
1193
+ * The caller should either use both NVTX_VERSION and
1194
+ * NVTX_RESOURCE_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
1195
+ * and a versioned type (Method 2). Using a mix of the two methods
1196
+ * will likely cause either source level incompatibility or binary
1197
+ * incompatibility in the future.
1198
+ *
1199
+ * \par Example
1200
+ * Register a resource and populate its attributes:
1201
+ * \code
1202
+ * nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
1203
+ *
1204
+ * // Initialize
1205
+ * nvtxResourceAttributes_t attribs = {0};
1206
+ * attribs.version = NVTX_VERSION;
1207
+ * attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
1208
+ *
1209
+ * // Configure the Attributes
1210
+ * attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER;
1211
+ * attribs.identifier.pValue = (const void*)pMutex;
1212
+ * attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
1213
+ * attribs.message.ascii = "Single thread access to database.";
1214
+ *
1215
+ * nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, &attribs);
1216
+ * \endcode
1217
+ *
1218
+ * \sa
1219
+ * ::nvtxDomainResourceCreate
1220
+ */
1221
+ typedef struct nvtxResourceAttributes_v0
1222
+ {
1223
+ /**
1224
+ * \brief Version flag of the structure.
1225
+ *
1226
+ * Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
1227
+ * supported in this header file. This can optionally be overridden to
1228
+ * another version of the tools extension library.
1229
+ */
1230
+ uint16_t version;
1231
+
1232
+ /**
1233
+ * \brief Size of the structure.
1234
+ *
1235
+ * Needs to be set to the size in bytes of this attribute
1236
+ * structure.
1237
+ */
1238
+ uint16_t size;
1239
+
1240
+ /**
1241
+ * \brief Identifier type specifies how to interpret the identifier field
1242
+ *
1243
+ * Defines the identifier format of the attribute structure's \ref RESOURCE_IDENTIFIER_FIELD
1244
+ * "identifier" field.
1245
+ *
1246
+ * Default Value is NVTX_RESOURCE_TYPE_UNKNOWN
1247
+ */
1248
+ int32_t identifierType; /* values from enums following the pattern nvtxResource[name]Type_t */
1249
+
1250
+ /**
1251
+ * \brief Identifier for the resource.
1252
+ * \anchor RESOURCE_IDENTIFIER_FIELD
1253
+ *
1254
+ * An identifier may be a pointer or a handle to an OS or middleware API object.
1255
+ * The resource type will assist in avoiding collisions where handles values may collide.
1256
+ */
1257
+ union identifier_t
1258
+ {
1259
+ const void* pValue;
1260
+ uint64_t ullValue;
1261
+ } identifier;
1262
+
1263
+ /** \brief Message type specified in this attribute structure.
1264
+ *
1265
+ * Defines the message format of the attribute structure's \ref RESOURCE_MESSAGE_FIELD
1266
+ * "message" field.
1267
+ *
1268
+ * Default Value is NVTX_MESSAGE_UNKNOWN
1269
+ */
1270
+ int32_t messageType; /* nvtxMessageType_t */
1271
+
1272
+ /** \brief Message assigned to this attribute structure. \anchor RESOURCE_MESSAGE_FIELD
1273
+ *
1274
+ * The text message that is attached to a resource.
1275
+ */
1276
+ nvtxMessageValue_t message;
1277
+
1278
+ } nvtxResourceAttributes_v0;
1279
+
1280
+ typedef struct nvtxResourceAttributes_v0 nvtxResourceAttributes_t;
1281
+
1282
+ /* \cond SHOW_HIDDEN
1283
+ * \version NVTX_VERSION_2
1284
+ */
1285
+ #define NVTX_RESOURCE_ATTRIB_STRUCT_SIZE (NVTX_STATIC_CAST(uint16_t, sizeof(nvtxResourceAttributes_v0)))
1286
+ typedef struct nvtxResourceHandle* nvtxResourceHandle_t;
1287
+ /** \endcond */
1288
+
1289
+
1290
+
1291
+ /* ------------------------------------------------------------------------- */
1292
+ /** \brief Create a resource object to track and associate data with OS and middleware objects
1293
+ *
1294
+ * Allows users to associate an API handle or pointer with a user-provided name.
1295
+ *
1296
+ *
1297
+ * \param domain - Domain to own the resource object
1298
+ * \param attribs - Attributes to be associated with the resource
1299
+ *
1300
+ * \return A handle that represents the newly created resource object.
1301
+ *
1302
+ * \par Example
1303
+ * Register a resource:
1304
+ * \code
1305
+ * nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
1306
+ * nvtxResourceAttributes_t attribs = {0};
1307
+ * attribs.version = NVTX_VERSION;
1308
+ * attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
1309
+ * attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER;
1310
+ * attribs.identifier.pValue = (const void*)pMutex;
1311
+ * attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
1312
+ * attribs.message.ascii = "Single thread access to database.";
1313
+ * nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, &attribs);
1314
+ * \endcode
1315
+ *
1316
+ * \sa
1317
+ * ::nvtxResourceAttributes_t
1318
+ * ::nvtxDomainResourceDestroy
1319
+ *
1320
+ * \version NVTX_VERSION_2
1321
+ * @{ */
1322
+ NVTX_DECLSPEC nvtxResourceHandle_t NVTX_API nvtxDomainResourceCreate(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs);
1323
+ /** @} */
1324
+
1325
+ /* ------------------------------------------------------------------------- */
1326
+ /** \brief Destroy a resource object to track and associate data with OS and middleware objects
1327
+ *
1328
+ * Allows users to associate an API handle or pointer with a user-provided name.
1329
+ *
1330
+ * \param resource - Handle to the resource in which to operate.
1331
+ *
1332
+ * \par Example
1333
+ * Unregister a resource:
1334
+ * \code
1335
+ * nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
1336
+ * nvtxResourceAttributes_t attribs = {0};
1337
+ * attribs.version = NVTX_VERSION;
1338
+ * attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
1339
+ * attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER;
1340
+ * attribs.identifier.pValue = (const void*)pMutex;
1341
+ * attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
1342
+ * attribs.message.ascii = "Single thread access to database.";
1343
+ * nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, &attribs);
1344
+ * // ...
1345
+ * nvtxDomainResourceDestroy(handle);
1346
+ * \endcode
1347
+ *
1348
+ * \sa
1349
+ * ::nvtxDomainResourceCreate
1350
+ *
1351
+ * \version NVTX_VERSION_2
1352
+ * @{ */
1353
+ NVTX_DECLSPEC void NVTX_API nvtxDomainResourceDestroy(nvtxResourceHandle_t resource);
1354
+ /** @} */
1355
+
1356
+
1357
+ /** \name Functions for NVTX Category Naming*/
1358
+
1359
+ /* ------------------------------------------------------------------------- */
1360
+ /**
1361
+ * \brief Annotate an NVTX category used within a domain.
1362
+ *
1363
+ * Categories are used to group sets of events. Each category is identified
1364
+ * through a unique ID and that ID is passed into any of the marker/range
1365
+ * events to assign that event to a specific category. The nvtxDomainNameCategory
1366
+ * function calls allow the user to assign a name to a category ID that is
1367
+ * specific to the domain.
1368
+ *
1369
+ * nvtxDomainNameCategory(NULL, category, name) is equivalent to calling
1370
+ * nvtxNameCategory(category, name).
1371
+ *
1372
+ * \param domain - The domain of scoping the category.
1373
+ * \param category - The category ID to name.
1374
+ * \param name - The name of the category.
1375
+ *
1376
+ * \remarks The category names are tracked per domain.
1377
+ *
1378
+ * \par Example
1379
+ * Assign names to categories in a domain:
1380
+ * \code
1381
+ * nvtxDomainHandle_t domain = nvtxDomainCreateA("example");
1382
+ * nvtxDomainNameCategoryA(domain, 1, "Memory Allocation");
1383
+ * nvtxDomainNameCategoryW(domain, 2, L"Memory Transfer");
1384
+ * \endcode
1385
+ *
1386
+ * \version NVTX_VERSION_2
1387
+ * @{ */
1388
+ NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryA(nvtxDomainHandle_t domain, uint32_t category, const char* name);
1389
+ NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryW(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name);
1390
+ /** @} */
1391
+
1392
+ /** \brief Annotate an NVTX category.
1393
+ *
1394
+ * Categories are used to group sets of events. Each category is identified
1395
+ * through a unique ID and that ID is passed into any of the marker/range
1396
+ * events to assign that event to a specific category. The nvtxNameCategory
1397
+ * function calls allow the user to assign a name to a category ID.
1398
+ *
1399
+ * \param category - The category ID to name.
1400
+ * \param name - The name of the category.
1401
+ *
1402
+ * \remarks The category names are tracked per process.
1403
+ *
1404
+ * \par Example
1405
+ * Assign names to categories:
1406
+ * \code
1407
+ * nvtxNameCategory(1, "Memory Allocation");
1408
+ * nvtxNameCategory(2, "Memory Transfer");
1409
+ * nvtxNameCategory(3, "Memory Object Lifetime");
1410
+ * \endcode
1411
+ *
1412
+ * \version NVTX_VERSION_1
1413
+ * @{ */
1414
+ NVTX_DECLSPEC void NVTX_API nvtxNameCategoryA(uint32_t category, const char* name);
1415
+ NVTX_DECLSPEC void NVTX_API nvtxNameCategoryW(uint32_t category, const wchar_t* name);
1416
+ /** @} */
1417
+
1418
+ /** \name Functions for OS Threads Naming*/
1419
+
1420
+ /* ------------------------------------------------------------------------- */
1421
+ /** \brief Annotate an OS thread.
1422
+ *
1423
+ * Allows the user to name an active thread of the current process. If an
1424
+ * invalid thread ID is provided or a thread ID from a different process is
1425
+ * used the behavior of the tool is implementation dependent.
1426
+ *
1427
+ * Tools expect thread ID to be a number that uniquely identifies the thread
1428
+ * at the time of the call. Note that a thread's ID can be reused after
1429
+ * it is destroyed. Tools may choose how to handle aliasing of thread IDs.
1430
+ *
1431
+ * POSIX pthread_t type returned by pthread_self() may not comply with these
1432
+ * expectations. Please use OS-specific thread ID instead of pthread_t.
1433
+ *
1434
+ * The thread name is associated to the default domain. To support domains
1435
+ * use resource objects via ::nvtxDomainResourceCreate.
1436
+ *
1437
+ * \param threadId - The ID of the thread to name.
1438
+ * \param name - The name of the thread.
1439
+ *
1440
+ * \par Examples
1441
+ * Name a thread based on the given operating system:
1442
+ *
1443
+ * Windows:
1444
+ * \code
1445
+ * #include <windows.h>
1446
+ * nvtxNameOsThread(GetCurrentThreadId(), "Current thread");
1447
+ * nvtxNameOsThread(GetThreadId(SomeThreadHandle), "Other thread");
1448
+ * \endcode
1449
+ *
1450
+ * Android:
1451
+ * \code
1452
+ * #include <unistd.h>
1453
+ * nvtxNameOsThreadA(gettid(), "Current thread");
1454
+ * nvtxNameOsThreadA(getpid(), "Main thread");
1455
+ * \endcode
1456
+ *
1457
+ * Linux:
1458
+ * \code
1459
+ * #include <sys/syscall.h>
1460
+ * nvtxNameOsThreadA(syscall(SYS_gettid), "Current thread");
1461
+ * \endcode
1462
+ * \code
1463
+ * #include <unistd.h>
1464
+ * nvtxNameOsThreadA(getpid(), "Main thread");
1465
+ * \endcode
1466
+ *
1467
+ * macOS:
1468
+ * \code
1469
+ * #include <sys/syscall.h>
1470
+ * nvtxNameOsThreadA(syscall(SYS_thread_selfid), "Current thread");
1471
+ * \endcode
1472
+ * \code
1473
+ * #include <pthread.h>
1474
+ * __uint64_t id;
1475
+ * pthread_threadid_np(pthread_self(), &id);
1476
+ * nvtxNameOsThreadA(id, "Current thread");
1477
+ * pthread_threadid_np(somePThreadId, &id);
1478
+ * nvtxNameOsThreadA(id, "Other thread");
1479
+ * \endcode
1480
+ *
1481
+ * \version NVTX_VERSION_1
1482
+ * @{ */
1483
+ NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadA(uint32_t threadId, const char* name);
1484
+ NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadW(uint32_t threadId, const wchar_t* name);
1485
+ /** @} */
1486
+
1487
+
1488
+ /** @} */ /*END defgroup*/
1489
+ /* ========================================================================= */
1490
+ /** \defgroup STRING_REGISTRATION String Registration
1491
+ *
1492
+ * Registered strings are intended to increase performance by lowering instrumentation
1493
+ * overhead. String may be registered once and the handle may be passed in place of
1494
+ * a string where an the APIs may allow.
1495
+ *
1496
+ * See \ref STRING_REGISTRATION for more details
1497
+ *
1498
+ * @{
1499
+ */
1500
+
1501
+ /* ------------------------------------------------------------------------- */
1502
+ /** \brief Register a string.
1503
+
1504
+ * Registers an immutable string with NVTX. Once registered the pointer used
1505
+ * to register the domain name can be used in nvtxEventAttributes_t
1506
+ * \ref MESSAGE_FIELD. This allows NVTX implementation to skip copying the
1507
+ * contents of the message on each event invocation.
1508
+ *
1509
+ * String registration is an optimization. It is recommended to use string
1510
+ * registration if the string will be passed to an event many times.
1511
+ *
1512
+ * String are not unregistered, except that by unregistering the entire domain
1513
+ *
1514
+ * \param domain - Domain handle. If NULL then the global domain is used.
1515
+ * \param string - A unique pointer to a sequence of characters.
1516
+ *
1517
+ * \return A handle representing the registered string.
1518
+ *
1519
+ * \par Example
1520
+ * Register a string:
1521
+ * \code
1522
+ * nvtxDomainHandle_t domain = nvtxDomainCreateA("com.nvidia.nvtx.example");
1523
+ * nvtxStringHandle_t message = nvtxDomainRegisterStringA(domain, "registered string");
1524
+ * nvtxEventAttributes_t eventAttrib = {0};
1525
+ * eventAttrib.version = NVTX_VERSION;
1526
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
1527
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_REGISTERED;
1528
+ * eventAttrib.message.registered = message;
1529
+ * \endcode
1530
+ *
1531
+ * \version NVTX_VERSION_2
1532
+ * @{ */
1533
+ NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringA(nvtxDomainHandle_t domain, const char* string);
1534
+ NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringW(nvtxDomainHandle_t domain, const wchar_t* string);
1535
+ /** @} */
1536
+
1537
+ /** @} */ /*END defgroup*/
1538
+ /* ========================================================================= */
1539
+ /** \defgroup DOMAINS Domains
1540
+ *
1541
+ * Domains are used to group events to a developer defined scope. Middleware
1542
+ * vendors may also scope their own events to avoid collisions with the
1543
+ * the application developer's events, so that the application developer may
1544
+ * inspect both parts and easily differentiate or filter them. By default
1545
+ * all events are scoped to a global domain where NULL is provided or when
1546
+ * using APIs provided b versions of NVTX below v2
1547
+ *
1548
+ * Domains are intended to be typically long lived objects with the intention
1549
+ * of logically separating events of large modules from each other such as
1550
+ * middleware libraries from each other and the main application.
1551
+ *
1552
+ * See \ref DOMAINS for more details
1553
+ *
1554
+ * @{
1555
+ */
1556
+
1557
+ /* ------------------------------------------------------------------------- */
1558
+ /** \brief Register a NVTX domain.
1559
+ *
1560
+ * Domains are used to scope annotations. All NVTX_VERSION_0 and NVTX_VERSION_1
1561
+ * annotations are scoped to the global domain. The function nvtxDomainCreate
1562
+ * creates a new named domain.
1563
+ *
1564
+ * Each domain maintains its own nvtxRangePush and nvtxRangePop stack.
1565
+ *
1566
+ * \param name - A unique string representing the domain.
1567
+ *
1568
+ * \return A handle representing the domain.
1569
+ *
1570
+ * \par Example
1571
+ * Create a domain:
1572
+ * \code
1573
+ * nvtxDomainHandle_t domain = nvtxDomainCreateA("com.nvidia.nvtx.example");
1574
+ *
1575
+ * nvtxMarkA("nvtxMarkA to global domain");
1576
+ *
1577
+ * nvtxEventAttributes_t eventAttrib1 = {0};
1578
+ * eventAttrib1.version = NVTX_VERSION;
1579
+ * eventAttrib1.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
1580
+ * eventAttrib1.message.ascii = "nvtxDomainMarkEx to global domain";
1581
+ * nvtxDomainMarkEx(NULL, &eventAttrib1);
1582
+ *
1583
+ * nvtxEventAttributes_t eventAttrib2 = {0};
1584
+ * eventAttrib2.version = NVTX_VERSION;
1585
+ * eventAttrib2.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
1586
+ * eventAttrib2.message.ascii = "nvtxDomainMarkEx to com.nvidia.nvtx.example";
1587
+ * nvtxDomainMarkEx(domain, &eventAttrib2);
1588
+ *
1589
+ * nvtxDomainDestroy(domain);
1590
+ * \endcode
1591
+ *
1592
+ * \sa
1593
+ * ::nvtxDomainDestroy
1594
+ *
1595
+ * \version NVTX_VERSION_2
1596
+ * @{ */
1597
+ NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateA(const char* name);
1598
+ NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateW(const wchar_t* name);
1599
+ /** @} */
1600
+
1601
+ /* ------------------------------------------------------------------------- */
1602
+ /** \brief Unregister a NVTX domain.
1603
+ *
1604
+ * Unregisters the domain handle and frees all domain specific resources.
1605
+ *
1606
+ * \param domain - the domain handle
1607
+ *
1608
+ * \par Example
1609
+ * Destroy a domain:
1610
+ * \code
1611
+ * nvtxDomainHandle_t domain = nvtxDomainCreateA("com.nvidia.nvtx.example");
1612
+ * // ...
1613
+ * nvtxDomainDestroy(domain);
1614
+ * \endcode
1615
+ *
1616
+ * \sa
1617
+ * ::nvtxDomainCreateA
1618
+ * ::nvtxDomainCreateW
1619
+ *
1620
+ * \version NVTX_VERSION_2
1621
+ * @{ */
1622
+ NVTX_DECLSPEC void NVTX_API nvtxDomainDestroy(nvtxDomainHandle_t domain);
1623
+ /** @} */
1624
+
1625
+
1626
+ /** @} */ /*END defgroup*/
1627
+ /* ========================================================================= */
1628
+ /** \cond SHOW_HIDDEN */
1629
+
1630
+ #ifdef UNICODE
1631
+ #define nvtxMark nvtxMarkW
1632
+ #define nvtxRangeStart nvtxRangeStartW
1633
+ #define nvtxRangePush nvtxRangePushW
1634
+ #define nvtxNameCategory nvtxNameCategoryW
1635
+ #define nvtxNameOsThread nvtxNameOsThreadW
1636
+ /* NVTX_VERSION_2 */
1637
+ #define nvtxDomainCreate nvtxDomainCreateW
1638
+ #define nvtxDomainRegisterString nvtxDomainRegisterStringW
1639
+ #define nvtxDomainNameCategory nvtxDomainNameCategoryW
1640
+ #else
1641
+ #define nvtxMark nvtxMarkA
1642
+ #define nvtxRangeStart nvtxRangeStartA
1643
+ #define nvtxRangePush nvtxRangePushA
1644
+ #define nvtxNameCategory nvtxNameCategoryA
1645
+ #define nvtxNameOsThread nvtxNameOsThreadA
1646
+ /* NVTX_VERSION_2 */
1647
+ #define nvtxDomainCreate nvtxDomainCreateA
1648
+ #define nvtxDomainRegisterString nvtxDomainRegisterStringA
1649
+ #define nvtxDomainNameCategory nvtxDomainNameCategoryA
1650
+ #endif
1651
+
1652
+ /** \endcond */
1653
+
1654
+ #ifdef __cplusplus
1655
+ } /* extern "C" */
1656
+ #endif /* __cplusplus */
1657
+
1658
+ #define NVTX_IMPL_GUARD /* Ensure other headers cannot be included directly */
1659
+
1660
+ #include "nvtxDetail/nvtxTypes.h"
1661
+
1662
+ #ifndef NVTX_NO_IMPL
1663
+ #include "nvtxDetail/nvtxImpl.h"
1664
+ #endif /*NVTX_NO_IMPL*/
1665
+
1666
+ #undef NVTX_IMPL_GUARD
1667
+
1668
+ #endif /* !defined(NVTX_VERSION) */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtCounters.h ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ #include "nvToolsExtPayload.h"
32
+
33
+ /**
34
+ * \brief The compatibility ID is used for versioning of this extension.
35
+ */
36
+ #ifndef NVTX_EXT_COUNTERS_COMPATID
37
+ #define NVTX_EXT_COUNTERS_COMPATID 0x0102
38
+ #endif
39
+
40
+ /**
41
+ * \brief The module ID identifies the payload extension. It has to be unique
42
+ * among the extension modules.
43
+ */
44
+ #ifndef NVTX_EXT_COUNTERS_MODULEID
45
+ #define NVTX_EXT_COUNTERS_MODULEID 4
46
+ #endif
47
+
48
+ #ifndef NVTX_COUNTER_IDS_V1
49
+ #define NVTX_COUNTER_IDS_V1
50
+
51
+ /** The counter ID is not specified. */
52
+ #define NVTX_COUNTER_ID_NONE 0
53
+
54
+ /** Static (user-provided, feed-forward) counter (group) IDs. */
55
+ #define NVTX_COUNTER_ID_STATIC_START (1 << 24)
56
+
57
+ /** Dynamically (tool) generated counter (group) IDs */
58
+ #define NVTX_COUNTER_ID_DYNAMIC_START (NVTX_STATIC_CAST(uint64_t, 1) << 32)
59
+
60
+ #endif /* NVTX_COUNTER_IDS_V1 */
61
+
62
+ /** Reasons for the missing sample value. */
63
+ #ifndef NVTX_COUNTER_SAMPLES_V1
64
+ #define NVTX_COUNTER_SAMPLES_V1
65
+
66
+ #define NVTX_COUNTER_SAMPLE_ZERO 0
67
+ #define NVTX_COUNTER_SAMPLE_UNCHANGED 1
68
+ #define NVTX_COUNTER_SAMPLE_UNAVAILABLE 2 /* Failed to get a counter sample. */
69
+
70
+ #endif /* NVTX_COUNTER_SAMPLES_V1 */
71
+
72
+ /**
73
+ * Counter batch timestamp array flags.
74
+ * Values must not overlap with `NVTX_BATCH_FLAG_*`.
75
+ * By default, one timestamp per sample is assumed.
76
+ */
77
+ #ifndef NVTX_COUNTER_BATCH_FLAGS_V1
78
+ #define NVTX_COUNTER_BATCH_FLAGS_V1
79
+
80
+ #define NVTX_COUNTER_BATCH_FLAG_BEGINTIME_INTERVAL_PAIR (1 << 32)
81
+ #define NVTX_COUNTER_BATCH_FLAG_ENDTIME_INTERVAL_PAIR (2 << 32)
82
+
83
+ #endif /* NVTX_COUNTER_BATCH_FLAGS_V1 */
84
+
85
+ #ifdef __cplusplus
86
+ extern "C" {
87
+ #endif /* __cplusplus */
88
+
89
+ #ifndef NVTX_COUNTER_TYPEDEFS_V1
90
+ #define NVTX_COUNTER_TYPEDEFS_V1
91
+
92
+ /**
93
+ * \brief Attributes of a counter or counter group.
94
+ */
95
+ typedef struct nvtxCounterAttr_v1
96
+ {
97
+ size_t structSize;
98
+
99
+ /**
100
+ * A schema ID referring to the data layout of the counter group or a
101
+ * predefined NVTX payloads number type.
102
+ */
103
+ uint64_t schemaId;
104
+
105
+ /** Name of the counter (group). */
106
+ const char* name;
107
+
108
+ /**
109
+ * Optional detailed description of the counter (group). A description for
110
+ * individual counters can be set in the schema registration.
111
+ */
112
+ const char* description;
113
+
114
+ /**
115
+ * Identifier of the counters' scope. A valid scope ID is either a
116
+ * predefined scope or the value returned by `nvtxScopeRegister` called for
117
+ * the same NVTX domain as `nvtxCounterRegister`. An invalid scope ID will
118
+ * be handled like `NVTX_SCOPE_NONE`.
119
+ */
120
+ uint64_t scopeId;
121
+
122
+ /**
123
+ * Optional semantics for a counter (group). The specified semantics apply
124
+ * to all counters in a group. If the semantics should only refer to a
125
+ * single counter in a group, the semantics field of the payload entry has
126
+ * to be used. Accepted semantics are `nvtxSemanticsCounter_t` and
127
+ * `nvtxSemanticsTime_t`.
128
+ */
129
+ const nvtxSemanticsHeader_t* semantics;
130
+
131
+ /**
132
+ * A static counter ID must be unique within the domain,
133
+ * >= NVTX_COUNTER_ID_STATIC_START, and < NVTX_COUNTER_ID_DYNAMIC_START.
134
+ * Use NVTX_COUNTER_ID_NONE to let the tool create a (dynamic) counter ID.
135
+ */
136
+ uint64_t counterId;
137
+ } nvtxCounterAttr_t;
138
+
139
+ /**
140
+ * \brief Helper struct to submit a batch of counters.
141
+ *
142
+ * The size of one sample is specified via the `payloadStaticSize` field of the
143
+ * counter's data layout schema or the size of the predefined payload entry type
144
+ * and must include padding. There should be no remainder when dividing
145
+ * `countersSize` by `nvtxPayloadSchemaAttr_t::payloadStaticSize`.
146
+ */
147
+ typedef struct nvtxCounterBatch_v1
148
+ {
149
+ /**
150
+ * Identifier of a counter group (data layout, scope, etc.). All counter
151
+ * samples in the batch have the same layout and size.
152
+ */
153
+ uint64_t counterId;
154
+
155
+ /** Batch of counter (group) samples. */
156
+ const void* counters;
157
+
158
+ /** Size of the counter batch (in bytes). */
159
+ size_t countersSize;
160
+
161
+ /**
162
+ * Timestamp ordering, timestamp style, etc.
163
+ * See `NVTX_BATCH_FLAG_*` and `NVTX_COUNTER_BATCH_FLAG_*`.
164
+ */
165
+ uint64_t flags;
166
+
167
+ /**
168
+ * Array of timestamps or a timestamp/interval pair. This field can be
169
+ * `NULL`, if timestamps are included in the counter samples as part of the
170
+ * counter group layout. By default, one timestamp per sample is assumed.
171
+ * The timestamp source is specified via time semantics passed during the
172
+ * counter group registration.
173
+ * This overrides the timestamps embedded in counter samples.
174
+ */
175
+ const int64_t* timestamps;
176
+
177
+ /** Size of the timestamps array or timestamp/interval pair (in bytes). */
178
+ size_t timestampsSize;
179
+ } nvtxCounterBatch_t;
180
+
181
+ #endif /* NVTX_COUNTER_TYPEDEFS_V1 */
182
+
183
+ #ifndef NVTX_COUNTER_API_FUNCTIONS_V1
184
+ #define NVTX_COUNTER_API_FUNCTIONS_V1
185
+
186
+ /**
187
+ * \brief Register a counter (group).
188
+ *
189
+ * @param hDomain NVTX domain handle.
190
+ * @param attr Pointer to the attributes of the counter (group).
191
+ *
192
+ * @return Identifier of a counter (group). The counter ID is unique within
193
+ * the NVTX domain.
194
+ */
195
+ NVTX_DECLSPEC uint64_t NVTX_API nvtxCounterRegister(
196
+ nvtxDomainHandle_t hDomain,
197
+ const nvtxCounterAttr_t* attr);
198
+
199
+ /**
200
+ * Sample one integer counter by value immediately
201
+ * (the NVTX tool determines the timestamp).
202
+ *
203
+ * @param hDomain handle of the NVTX domain.
204
+ * @param counterId identifier of the NVTX counter (group).
205
+ * @param value 64-bit integer counter value.
206
+ */
207
+ NVTX_DECLSPEC void NVTX_API nvtxCounterSampleInt64(
208
+ nvtxDomainHandle_t hDomain,
209
+ uint64_t counterId,
210
+ int64_t value);
211
+
212
+ /**
213
+ * Sample one floating point counter by value immediately
214
+ * (the NVTX tool determines the timestamp).
215
+ *
216
+ * @param hDomain handle of the NVTX domain.
217
+ * @param counterId identifier of the NVTX counter (group).
218
+ * @param value 64-bit floating-point counter value.
219
+ */
220
+ NVTX_DECLSPEC void NVTX_API nvtxCounterSampleFloat64(
221
+ nvtxDomainHandle_t hDomain,
222
+ uint64_t counterId,
223
+ double value);
224
+
225
+ /**
226
+ * Sample a counter (group) by reference immediately
227
+ * (the NVTX tool determines the timestamp).
228
+ *
229
+ * @param hDomain handle of the NVTX domain.
230
+ * @param counterId identifier of the NVTX counter (group).
231
+ * @param value pointer to one or more counter values.
232
+ * @param size size of the counter value(s) in bytes.
233
+ */
234
+ NVTX_DECLSPEC void NVTX_API nvtxCounterSample(
235
+ nvtxDomainHandle_t hDomain,
236
+ uint64_t counterId,
237
+ const void* value,
238
+ size_t size);
239
+
240
+ /**
241
+ * \brief Sample without value.
242
+ *
243
+ * @param hDomain handle of the NVTX domain.
244
+ * @param counterId identifier of the NVTX counter (group).
245
+ * @param reason reason for the missing sample value.
246
+ */
247
+ NVTX_DECLSPEC void NVTX_API nvtxCounterSampleNoValue(
248
+ nvtxDomainHandle_t hDomain,
249
+ uint64_t counterId,
250
+ uint8_t reason);
251
+
252
+ /**
253
+ * \brief Submit a batch of counters in the given domain.
254
+ *
255
+ * The size of a data sampling point is defined by the `payloadStaticSize` field
256
+ * of the payload schema. An NVTX tool can assume that the counter samples are
257
+ * stored as an array with each entry being `payloadStaticSize` bytes.
258
+ *
259
+ * @param hDomain handle of the NVTX domain
260
+ * @param counterData Pointer to the counter data to be submitted.
261
+ */
262
+ NVTX_DECLSPEC void NVTX_API nvtxCounterBatchSubmit(
263
+ nvtxDomainHandle_t hDomain,
264
+ const nvtxCounterBatch_t* counterData);
265
+
266
+ #endif /* NVTX_COUNTER_API_FUNCTIONS_V1 */
267
+
268
+ #ifndef NVTX_COUNTER_CALLBACK_ID_V1
269
+ #define NVTX_COUNTER_CALLBACK_ID_V1
270
+
271
+ #define NVTX3EXT_CBID_nvtxCounterRegister 0
272
+ #define NVTX3EXT_CBID_nvtxCounterSampleInt64 1
273
+ #define NVTX3EXT_CBID_nvtxCounterSampleFloat64 2
274
+ #define NVTX3EXT_CBID_nvtxCounterSample 3
275
+ #define NVTX3EXT_CBID_nvtxCounterSampleNoValue 4
276
+ #define NVTX3EXT_CBID_nvtxCounterBatchSubmit 5
277
+
278
+ #endif /* NVTX_COUNTER_CALLBACK_ID_V1 */
279
+
280
+ /* Macros to create versioned symbols. */
281
+ #ifndef NVTX_EXT_COUNTERS_VERSIONED_IDENTIFIERS_V1
282
+ #define NVTX_EXT_COUNTERS_VERSIONED_IDENTIFIERS_V1
283
+ #define NVTX_EXT_COUNTERS_VERSIONED_IDENTIFIER_L3(NAME, VERSION, COMPATID) \
284
+ NAME##_v##VERSION##_cnt##COMPATID
285
+ #define NVTX_EXT_COUNTERS_VERSIONED_IDENTIFIER_L2(NAME, VERSION, COMPATID) \
286
+ NVTX_EXT_COUNTERS_VERSIONED_IDENTIFIER_L3(NAME, VERSION, COMPATID)
287
+ #define NVTX_EXT_COUNTERS_VERSIONED_ID(NAME) \
288
+ NVTX_EXT_COUNTERS_VERSIONED_IDENTIFIER_L2(NAME, NVTX_VERSION, NVTX_EXT_COUNTERS_COMPATID)
289
+ #endif /* NVTX_EXT_COUNTERS_VERSIONED_IDENTIFIERS_V1 */
290
+
291
+ #ifdef __GNUC__
292
+ #pragma GCC visibility push(internal)
293
+ #endif
294
+
295
+ #define NVTX_EXT_TYPES_GUARD /* Ensure other headers cannot be included directly. */
296
+ #include "nvtxDetail/nvtxExtTypes.h"
297
+ #undef NVTX_EXT_TYPES_GUARD
298
+
299
+ #ifndef NVTX_NO_IMPL
300
+ #define NVTX_EXT_IMPL_COUNTERS_GUARD /* Ensure other headers cannot be included directly. */
301
+ #include "nvtxDetail/nvtxExtImplCounters_v1.h"
302
+ #undef NVTX_EXT_IMPL_COUNTERS_GUARD
303
+ #endif /*NVTX_NO_IMPL*/
304
+
305
+ #ifdef __GNUC__
306
+ #pragma GCC visibility pop
307
+ #endif
308
+
309
+ #ifdef __cplusplus
310
+ }
311
+ #endif /* __cplusplus */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtCuda.h ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ #include "nvToolsExt.h"
32
+
33
+ #include "cuda.h"
34
+
35
+ #ifndef NVTOOLSEXT_CUDA_V3
36
+ #define NVTOOLSEXT_CUDA_V3
37
+
38
+ #ifdef __cplusplus
39
+ extern "C" {
40
+ #endif /* __cplusplus */
41
+
42
+ /* ========================================================================= */
43
+ /** \name Functions for CUDA Resource Naming
44
+ */
45
+ /** \addtogroup RESOURCE_NAMING
46
+ * \section RESOURCE_NAMING_CUDA CUDA Resource Naming
47
+ *
48
+ * This section covers the API functions that allow to annotate CUDA resources
49
+ * with user-provided names.
50
+ *
51
+ * @{
52
+ */
53
+
54
+ /* ------------------------------------------------------------------------- */
55
+ /* \cond SHOW_HIDDEN
56
+ * \brief Used to build a non-colliding value for resource types separated class
57
+ * \version NVTX_VERSION_2
58
+ */
59
+ #define NVTX_RESOURCE_CLASS_CUDA 4
60
+ /** \endcond */
61
+
62
+ /* ------------------------------------------------------------------------- */
63
+ /** \brief Resource types for CUDA
64
+ */
65
+ typedef enum nvtxResourceCUDAType_t
66
+ {
67
+ NVTX_RESOURCE_TYPE_CUDA_DEVICE = NVTX_RESOURCE_MAKE_TYPE(CUDA, 1), /* CUdevice */
68
+ NVTX_RESOURCE_TYPE_CUDA_CONTEXT = NVTX_RESOURCE_MAKE_TYPE(CUDA, 2), /* CUcontext */
69
+ NVTX_RESOURCE_TYPE_CUDA_STREAM = NVTX_RESOURCE_MAKE_TYPE(CUDA, 3), /* CUstream */
70
+ NVTX_RESOURCE_TYPE_CUDA_EVENT = NVTX_RESOURCE_MAKE_TYPE(CUDA, 4) /* CUevent */
71
+ } nvtxResourceCUDAType_t;
72
+
73
+
74
+ /* ------------------------------------------------------------------------- */
75
+ /** \brief Annotates a CUDA device.
76
+ *
77
+ * Allows the user to associate a CUDA device with a user-provided name.
78
+ *
79
+ * \param device - The handle of the CUDA device to name.
80
+ * \param name - The name of the CUDA device.
81
+ *
82
+ * \version NVTX_VERSION_1
83
+ * @{ */
84
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceA(CUdevice device, const char* name);
85
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceW(CUdevice device, const wchar_t* name);
86
+ /** @} */
87
+
88
+ /* ------------------------------------------------------------------------- */
89
+ /** \brief Annotates a CUDA context.
90
+ *
91
+ * Allows the user to associate a CUDA context with a user-provided name.
92
+ *
93
+ * \param context - The handle of the CUDA context to name.
94
+ * \param name - The name of the CUDA context.
95
+ *
96
+ * \par Example
97
+ * Name a CUDA context:
98
+ * \code
99
+ * CUresult status = cuCtxCreate( &cuContext, 0, cuDevice );
100
+ * if ( CUDA_SUCCESS != status )
101
+ * goto Error;
102
+ * nvtxNameCuContext(cuContext, "CTX_NAME");
103
+ * \endcode
104
+ *
105
+ * \version NVTX_VERSION_1
106
+ * @{ */
107
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuContextA(CUcontext context, const char* name);
108
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuContextW(CUcontext context, const wchar_t* name);
109
+ /** @} */
110
+
111
+ /* ------------------------------------------------------------------------- */
112
+ /** \brief Annotates a CUDA stream.
113
+ *
114
+ * Allows the user to associate a CUDA stream with a user-provided name.
115
+ *
116
+ * \param stream - The handle of the CUDA stream to name.
117
+ * \param name - The name of the CUDA stream.
118
+ *
119
+ * \version NVTX_VERSION_1
120
+ * @{ */
121
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamA(CUstream stream, const char* name);
122
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamW(CUstream stream, const wchar_t* name);
123
+ /** @} */
124
+
125
+ /* ------------------------------------------------------------------------- */
126
+ /** \brief Annotates a CUDA event.
127
+ *
128
+ * Allows the user to associate a CUDA event with a user-provided name.
129
+ *
130
+ * \param event - The handle of the CUDA event to name.
131
+ * \param name - The name of the CUDA event.
132
+ *
133
+ * \version NVTX_VERSION_1
134
+ * @{ */
135
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuEventA(CUevent event, const char* name);
136
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuEventW(CUevent event, const wchar_t* name);
137
+ /** @} */
138
+
139
+ /** @} */ /* END RESOURCE_NAMING */
140
+
141
+ /* ========================================================================= */
142
+ #ifdef UNICODE
143
+ #define nvtxNameCuDevice nvtxNameCuDeviceW
144
+ #define nvtxNameCuContext nvtxNameCuContextW
145
+ #define nvtxNameCuStream nvtxNameCuStreamW
146
+ #define nvtxNameCuEvent nvtxNameCuEventW
147
+ #else
148
+ #define nvtxNameCuDevice nvtxNameCuDeviceA
149
+ #define nvtxNameCuContext nvtxNameCuContextA
150
+ #define nvtxNameCuStream nvtxNameCuStreamA
151
+ #define nvtxNameCuEvent nvtxNameCuEventA
152
+ #endif
153
+
154
+ #ifdef __cplusplus
155
+ }
156
+ #endif /* __cplusplus */
157
+
158
+ #ifndef NVTX_NO_IMPL
159
+ #define NVTX_IMPL_GUARD_CUDA /* Ensure other headers cannot be included directly */
160
+ #include "nvtxDetail/nvtxImplCuda_v3.h"
161
+ #undef NVTX_IMPL_GUARD_CUDA
162
+ #endif /*NVTX_NO_IMPL*/
163
+
164
+ #endif /* NVTOOLSEXT_CUDA_V3 */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtCudaRt.h ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ #include "nvToolsExt.h"
32
+
33
+ #include "cuda.h"
34
+ #include "driver_types.h"
35
+
36
+ #ifndef NVTOOLSEXT_CUDART_V3
37
+ #define NVTOOLSEXT_CUDART_V3
38
+
39
+ #ifdef __cplusplus
40
+ extern "C" {
41
+ #endif /* __cplusplus */
42
+
43
+ /* ========================================================================= */
44
+ /** \name Functions for CUDA Resource Naming
45
+ */
46
+ /** \addtogroup RESOURCE_NAMING
47
+ * \section RESOURCE_NAMING_CUDART CUDA Runtime Resource Naming
48
+ *
49
+ * This section covers the API functions that allow to annotate CUDA resources
50
+ * with user-provided names.
51
+ *
52
+ * @{
53
+ */
54
+
55
+ /* ------------------------------------------------------------------------- */
56
+ /* \cond SHOW_HIDDEN
57
+ * \brief Used to build a non-colliding value for resource types separated class
58
+ * \version NVTX_VERSION_2
59
+ */
60
+ #define NVTX_RESOURCE_CLASS_CUDART 5
61
+ /** \endcond */
62
+
63
+ /* ------------------------------------------------------------------------- */
64
+ /** \brief Resource types for CUDART
65
+ */
66
+ typedef enum nvtxResourceCUDARTType_t
67
+ {
68
+ NVTX_RESOURCE_TYPE_CUDART_DEVICE = NVTX_RESOURCE_MAKE_TYPE(CUDART, 0), /* int device */
69
+ NVTX_RESOURCE_TYPE_CUDART_STREAM = NVTX_RESOURCE_MAKE_TYPE(CUDART, 1), /* cudaStream_t */
70
+ NVTX_RESOURCE_TYPE_CUDART_EVENT = NVTX_RESOURCE_MAKE_TYPE(CUDART, 2) /* cudaEvent_t */
71
+ } nvtxResourceCUDARTType_t;
72
+
73
+
74
+ /* ------------------------------------------------------------------------- */
75
+ /** \brief Annotates a CUDA device.
76
+ *
77
+ * Allows the user to associate a CUDA device with a user-provided name.
78
+ *
79
+ * \param device - The id of the CUDA device to name.
80
+ * \param name - The name of the CUDA device.
81
+ *
82
+ * \version NVTX_VERSION_1
83
+ * @{ */
84
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceA(int device, const char* name);
85
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceW(int device, const wchar_t* name);
86
+ /** @} */
87
+
88
+ /* ------------------------------------------------------------------------- */
89
+ /** \brief Annotates a CUDA stream.
90
+ *
91
+ * Allows the user to associate a CUDA stream with a user-provided name.
92
+ *
93
+ * \param stream - The handle of the CUDA stream to name.
94
+ * \param name - The name of the CUDA stream.
95
+ *
96
+ * \version NVTX_VERSION_1
97
+ * @{ */
98
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamA(cudaStream_t stream, const char* name);
99
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamW(cudaStream_t stream, const wchar_t* name);
100
+ /** @} */
101
+
102
+ /* ------------------------------------------------------------------------- */
103
+ /** \brief Annotates a CUDA event.
104
+ *
105
+ * Allows the user to associate a CUDA event with a user-provided name.
106
+ *
107
+ * \param event - The handle of the CUDA event to name.
108
+ * \param name - The name of the CUDA event.
109
+ *
110
+ * \version NVTX_VERSION_1
111
+ * @{ */
112
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventA(cudaEvent_t event, const char* name);
113
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventW(cudaEvent_t event, const wchar_t* name);
114
+ /** @} */
115
+
116
+ /** @} */ /* END RESOURCE_NAMING */
117
+
118
+ /* ========================================================================= */
119
+ #ifdef UNICODE
120
+ #define nvtxNameCudaDevice nvtxNameCudaDeviceW
121
+ #define nvtxNameCudaStream nvtxNameCudaStreamW
122
+ #define nvtxNameCudaEvent nvtxNameCudaEventW
123
+ #else
124
+ #define nvtxNameCudaDevice nvtxNameCudaDeviceA
125
+ #define nvtxNameCudaStream nvtxNameCudaStreamA
126
+ #define nvtxNameCudaEvent nvtxNameCudaEventA
127
+ #endif
128
+
129
+ #ifdef __cplusplus
130
+ }
131
+ #endif /* __cplusplus */
132
+
133
+ #ifndef NVTX_NO_IMPL
134
+ #define NVTX_IMPL_GUARD_CUDART /* Ensure other headers cannot be included directly */
135
+ #include "nvtxDetail/nvtxImplCudaRt_v3.h"
136
+ #undef NVTX_IMPL_GUARD_CUDART
137
+ #endif /*NVTX_NO_IMPL*/
138
+
139
+ #endif /* NVTOOLSEXT_CUDART_V3 */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtMem.h ADDED
@@ -0,0 +1,749 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ #include "nvToolsExt.h"
32
+
33
+ #ifdef __cplusplus
34
+ extern "C" {
35
+ #endif /* __cplusplus */
36
+
37
+ #ifndef NVTX_EXT_MODULEID_MEM
38
+ #define NVTX_EXT_MODULEID_MEM 1
39
+ #endif
40
+
41
+ /* \cond SHOW_HIDDEN
42
+ * \brief A compatibility ID value used in structures and initialization to
43
+ * identify version differences.
44
+ */
45
+ #ifndef NVTX_EXT_COMPATID_MEM
46
+ #define NVTX_EXT_COMPATID_MEM 0x0102
47
+ #endif
48
+ /* \endcond
49
+ */
50
+
51
+ #ifndef NVTX_MEM_CONTENTS_V1
52
+ #define NVTX_MEM_CONTENTS_V1
53
+
54
+ /* \cond SHOW_HIDDEN
55
+ * \brief This value is returned by functions that return `nvtxMemHeapHandle_t`,
56
+ * if a tool is not attached.
57
+ */
58
+ #define NVTX_MEM_HEAP_HANDLE_NO_TOOL (NVTX_STATIC_CAST(nvtxMemHeapHandle_t, NVTX_STATIC_CAST(intptr_t, -1)))
59
+ /* \endcond
60
+ */
61
+
62
+ /* \cond SHOW_HIDDEN
63
+ * \brief This value is returned by functions that return `nvtxMemRegionHandle_t`
64
+ * if a tool is not attached.
65
+ */
66
+ #define NVTX_MEM_REGION_HANDLE_NO_TOOL (NVTX_STATIC_CAST(nvtxMemRegionHandle_t, NVTX_STATIC_CAST(intptr_t, -1)))
67
+ /* \endcond
68
+ */
69
+
70
+ /* \cond SHOW_HIDDEN
71
+ * \brief This value is returned by functions that return `nvtxMemPermissionsHandle_t`
72
+ * if a tool is not attached.
73
+ */
74
+ #define NVTX_MEM_PERMISSIONS_HANDLE_NO_TOOL (NVTX_STATIC_CAST(nvtxMemPermissionsHandle_t, -1))
75
+ /* \endcond
76
+ */
77
+
78
+
79
+ /* \cond SHOW_HIDDEN
80
+ * \brief This should not be used and is considered an error but defined to
81
+ * detect an accidental use of zero or NULL.
82
+ */
83
+ #define NVTX_MEM_HEAP_USAGE_UNKNOWN 0x0
84
+ /* \endcond
85
+ */
86
+
87
+
88
+ /* \cond SHOW_HIDDEN
89
+ * \brief This should not be used and is considered an error but defined to
90
+ * detect an accidental use of zero or NULL.
91
+ */
92
+ #define NVTX_MEM_TYPE_UNKNOWN 0x0
93
+ /* \endcond
94
+ */
95
+
96
+
97
+ /* ------------------------------------------------------------------------- */
98
+ /** \defgroup MEMORY Memory
99
+ * See page \ref PAGE_MEMORY.
100
+ * @{
101
+ */
102
+
103
+ /**
104
+ * \brief To indicate the full process virtual address space as a heap for
105
+ * functions where a nvtxMemHeapHandle_t is accepted.
106
+ *
107
+ * The heap by default is always read-write-execute permissions without creating regions.
108
+ * Regions created in this heap have read-write access by default but not execute.
109
+ */
110
+ #define NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE (NVTX_STATIC_CAST(nvtxMemHeapHandle_t, 0))
111
+
112
+ /** \brief This heap is a sub-allocator.
113
+ *
114
+ * Heap created with this usage should not be accessed by the user until regions are registered.
115
+ * Regions from a heap with this usage have read-write access by default but not execute.
116
+ */
117
+ #define NVTX_MEM_HEAP_USAGE_TYPE_SUB_ALLOCATOR 0x1
118
+
119
+ /**
120
+ * \brief This is a heap of memory that has an explicit layout.
121
+ *
122
+ * The layout could be static or dynamic (calculated). This often represents an algorithm's
123
+ * structures that are packed together. By default this heap is assumed to be accessible for
124
+ * scopes where the memory is naturally accessible by hardware. Regions may be use to further
125
+ * annotate or restrict access. A tool may have an option to be more strict, but special
126
+ * consideration must be made for `NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE`.
127
+ *
128
+ * The behavior of this usage is similar to NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE but
129
+ * a tool can use it to track special behaviors and reservation.
130
+ *
131
+ * Memory in a heap with this usage has read-write permissions by default but not execute without
132
+ * creating regions. Regions created in this heap have the same default permission access.
133
+ */
134
+ #define NVTX_MEM_HEAP_USAGE_TYPE_LAYOUT 0x2
135
+
136
+
137
+ /**
138
+ * \brief Standard process userspace virtual addresses for linear allocations.
139
+ *
140
+ * APIs that map into this space, such as CUDA UVA should use this type.
141
+ *
142
+ * Relevant functions: cudaMalloc, cudaMallocManaged, cudaHostAlloc, cudaMallocHost
143
+ * NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE is supported
144
+ *
145
+ * nvtxMemHeapRegister receives a heapDesc of type nvtxMemVirtualRangeDesc_t
146
+ */
147
+ #define NVTX_MEM_TYPE_VIRTUAL_ADDRESS 0x1
148
+
149
+
150
+ /**
151
+ * \brief To indicate you are modifying permissions to the process-wide
152
+ * full virtual address space.
153
+ *
154
+ * This is a companion object to `NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE`.
155
+ */
156
+ #define NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE (NVTX_STATIC_CAST(nvtxMemPermissionsHandle_t, 0))
157
+
158
+ #define NVTX_MEM_PERMISSIONS_CREATE_FLAGS_NONE 0x0
159
+ #define NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_READ 0x1
160
+ #define NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_WRITE 0x2
161
+ #define NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_ATOMIC 0x4
162
+
163
+
164
+ /* \cond SHOW_HIDDEN
165
+ * \brief Forward declaration of opaque memory heap structure.
166
+ */
167
+ struct nvtxMemHeap_v1;
168
+ typedef struct nvtxMemHeap_v1 nvtxMemHeap_t;
169
+ /* \endcond
170
+ */
171
+
172
+ /** \brief A handle returned by a tool to represent a memory heap. */
173
+ typedef nvtxMemHeap_t* nvtxMemHeapHandle_t;
174
+
175
+ /* \cond SHOW_HIDDEN
176
+ * \brief Forward declaration of opaque memory heap structure.
177
+ */
178
+ struct nvtxMemRegion_v1;
179
+ typedef struct nvtxMemRegion_v1 nvtxMemRegion_t;
180
+ /* \endcond
181
+ */
182
+
183
+ /** \brief A handle returned by a tool to represent a memory region. */
184
+ typedef nvtxMemRegion_t* nvtxMemRegionHandle_t;
185
+
186
+ /** \brief A reference to a memory region (by pointer or handle).
187
+ * Which member of the union will be determined by a type or flag field outside.
188
+ */
189
+ typedef union nvtxMemRegionRef_t
190
+ {
191
+ void const* pointer;
192
+ nvtxMemRegionHandle_t handle;
193
+ } nvtxMemRegionRef_t;
194
+
195
+ /* \cond SHOW_HIDDEN
196
+ * \brief Forward declaration of opaque memory permissions structure
197
+ */
198
+ struct nvtxMemPermissions_v1;
199
+ typedef struct nvtxMemPermissions_v1 nvtxMemPermissions_t;
200
+ /* \endcond
201
+ */
202
+
203
+ /** \brief A handle returned by a tool to represent a memory permissions mask. */
204
+ typedef nvtxMemPermissions_t* nvtxMemPermissionsHandle_t;
205
+
206
+
207
+ typedef struct nvtxMemVirtualRangeDesc_v1
208
+ {
209
+ size_t size;
210
+ void const* ptr;
211
+ } nvtxMemVirtualRangeDesc_v1 ;
212
+ typedef nvtxMemVirtualRangeDesc_v1 nvtxMemVirtualRangeDesc_t;
213
+
214
+
215
+ /** \brief structure to describe a heap in process virtual memory. */
216
+ typedef struct nvtxMemHeapDesc_v1
217
+ {
218
+ uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
219
+ uint16_t structSize; /* Size of the structure. */
220
+ uint32_t reserved0;
221
+
222
+ /** \brief Usage characteristics of the heap
223
+ *
224
+ * Usage characteristics help tools like memcheckers, sanitizers,
225
+ * as well as other debugging and profiling tools to determine some
226
+ * special behaviors they should apply to the heap and its regions.
227
+ * The value follows the convention NVTX_MEM_HEAP_USAGE_*
228
+ *
229
+ * Default Value is 0, which is invalid.
230
+ */
231
+ uint32_t usage;
232
+
233
+ /** \brief Memory type characteristics of the heap
234
+ *
235
+ * The 'type' indicates how to interpret the ptr field of the heapDesc.
236
+ * This is intended to support many additional types of memory, beyond
237
+ * standard process virtual memory, such as API specific memory only
238
+ * addressed by handles or multi-dimensional memory requiring more complex
239
+ * descriptions to handle features like strides, tiling, or interlace.
240
+ *
241
+ * The values conforms to NVTX_MEM_TYPE_*
242
+ *
243
+ * The value in the field 'type' identifies the descriptor type that will
244
+ * be in the field 'typeSpecificDesc'. 'typeSpecificDesc' is void* because
245
+ * it is extensible. Example usage is if type is NVTX_MEM_TYPE_VIRTUAL_ADDRESS,
246
+ * then typeSpecificDesc points to a nvtxMemVirtualRangeDesc_t.
247
+ *
248
+ * Default Value is 0, which is invalid.
249
+ */
250
+ uint32_t type;
251
+
252
+ /** \brief size of the heap memory descriptor pointed to by typeSpecificDesc
253
+ *
254
+ * Default Value is 0 which is invalid.
255
+ */
256
+ size_t typeSpecificDescSize;
257
+
258
+ /** \brief Pointer to the heap memory descriptor
259
+ *
260
+ * The value in the field 'type' identifies the descriptor type that will
261
+ * be in the field 'typeSpecificDesc'. 'typeSpecificDesc' is void* because
262
+ * it is extensible. Example usage is if type is NVTX_MEM_TYPE_VIRTUAL_ADDRESS,
263
+ * then typeSpecificDesc points to a nvtxMemVirtualRangeDesc_t.
264
+ *
265
+ * Default Value is 0, which is invalid.
266
+ */
267
+ void const* typeSpecificDesc;
268
+
269
+ /** \brief ID of the category the event is assigned to.
270
+ *
271
+ * A category is a user-controlled ID that can be used to group
272
+ * events. The tool may use category IDs to improve filtering or
273
+ * enable grouping of events in the same category. The functions
274
+ * \ref ::nvtxNameCategoryA or \ref ::nvtxNameCategoryW can be used
275
+ * to name a category.
276
+ *
277
+ * Default Value is 0.
278
+ */
279
+ uint32_t category;
280
+
281
+ /** \brief Message type specified in this attribute structure.
282
+ *
283
+ * Defines the message format of the attribute structure's \ref MEM_MESSAGE_FIELD
284
+ * "message" field.
285
+ *
286
+ * Default Value is `NVTX_MESSAGE_UNKNOWN`.
287
+ */
288
+ uint32_t messageType; /* nvtxMessageType_t */
289
+
290
+ /** \brief Message assigned to this attribute structure. \anchor MEM_MESSAGE_FIELD
291
+ *
292
+ * The text message that is attached to an event.
293
+ */
294
+ nvtxMessageValue_t message;
295
+
296
+ } nvtxMemHeapDesc_v1 ;
297
+ typedef nvtxMemHeapDesc_v1 nvtxMemHeapDesc_t;
298
+
299
+ /**
300
+ * \brief Create a memory heap to represent a object or range of memory that will be further
301
+ * sub-divided into regions.
302
+ *
303
+ * The handle used to address the heap will depend on the heap's type. Where the heap is virtual
304
+ * memory accessible, the address of the heap's memory itself is its handle. This will likewise
305
+ * be returned from the function.
306
+ *
307
+ * For more advanced types, where the heap is not virtual memory accessible the tools may be
308
+ * responsible for returning a void const * that that uniquely identifies the object. Please see
309
+ * the description of each heap type for more details on whether this is expected to be a uniquely
310
+ * generated by the tool or otherwise.
311
+ */
312
+ NVTX_DECLSPEC nvtxMemHeapHandle_t NVTX_API nvtxMemHeapRegister(
313
+ nvtxDomainHandle_t domain,
314
+ nvtxMemHeapDesc_t const* desc);
315
+
316
+ /** \brief Destroy a memory heap. */
317
+ NVTX_DECLSPEC void NVTX_API nvtxMemHeapUnregister(
318
+ nvtxDomainHandle_t domain,
319
+ nvtxMemHeapHandle_t heap);/* NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE is not supported */
320
+
321
+ /**
322
+ * \brief Reset the memory heap wipes out any changes, as if it were a fresh heap.
323
+ *
324
+ * This includes invalidating all regions and their handles.
325
+ */
326
+ NVTX_DECLSPEC void NVTX_API nvtxMemHeapReset(
327
+ nvtxDomainHandle_t domain,
328
+ nvtxMemHeapHandle_t heap); /* NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE is supported */
329
+
330
+ /**
331
+ * \brief Register a region of memory inside of a heap.
332
+ *
333
+ * The heap refers the the heap within which the region resides. This can be from
334
+ * `nvtxMemHeapRegister`, `NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE`, or one provided
335
+ * from other extension API.
336
+ *
337
+ * The regionType arg will define which type is used in regionDescArray.
338
+ * The most commonly used type is `NVTX_MEM_TYPE_VIRTUAL_ADDRESS`.
339
+ * In this case regionDescElements is an array of `nvtxMemVirtualRangeDesc_t`.
340
+ *
341
+ * The regionCount arg is how many element are in regionDescArray and regionHandleArrayOut.
342
+ *
343
+ * The regionHandleArrayOut arg points to an array where the tool will provide region handles. If
344
+ * a pointer is provided, it is expected to have regionCount elements. This pointer can be NULL if
345
+ * regionType is NVTX_MEM_TYPE_VIRTUAL_ADDRESS. In this case, the user can use the pointer to the
346
+ * virtual memory to reference the region in other related functions which accept nvtxMemRegionRef_t.
347
+ */
348
+ typedef struct nvtxMemRegionsRegisterBatch_v1
349
+ {
350
+ uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
351
+ uint16_t structSize; /* Size of the structure. */
352
+
353
+ uint32_t regionType; /* NVTX_MEM_TYPE_* */
354
+
355
+ nvtxMemHeapHandle_t heap;
356
+
357
+ size_t regionCount;
358
+ size_t regionDescElementSize;
359
+ void const* regionDescElements; /* This will also become the handle for this region. */
360
+ nvtxMemRegionHandle_t* regionHandleElementsOut; /* This will also become the handle for this region. */
361
+
362
+ } nvtxMemRegionsRegisterBatch_v1;
363
+ typedef nvtxMemRegionsRegisterBatch_v1 nvtxMemRegionsRegisterBatch_t;
364
+
365
+ /** \brief Register a region of memory inside of a heap of linear process virtual memory
366
+ */
367
+ NVTX_DECLSPEC void NVTX_API nvtxMemRegionsRegister(
368
+ nvtxDomainHandle_t domain,
369
+ nvtxMemRegionsRegisterBatch_t const* desc);
370
+
371
+
372
+
373
+ /**
374
+ * \brief Register a region of memory inside of a heap.
375
+ *
376
+ * The heap refers the the heap within which the region resides.
377
+ * This can be from nvtxMemHeapRegister, NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE, or
378
+ * one provided from other extension API.
379
+ *
380
+ * The regionType arg will define which type is used in regionDescArray.
381
+ * The most commonly used type is NVTX_MEM_TYPE_VIRTUAL_ADDRESS.
382
+ *
383
+ * The regionCount arg is how many element are in regionDescArray and regionHandleArrayOut.
384
+ *
385
+ * The regionHandleArrayOut arg points to an array where the tool will provide region handles. If
386
+ * a pointer if provided, it is expected to have regionCount elements. This pointer can be NULL if
387
+ * regionType is NVTX_MEM_TYPE_VIRTUAL_ADDRESS. In this case, the user can use the pointer to the
388
+ * virtual memory to reference the region in other related functions which accept nvtxMemRegionRef_t.
389
+ */
390
+ typedef struct nvtxMemRegionsResizeBatch_v1
391
+ {
392
+ uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
393
+ uint16_t structSize; /* Size of the structure. */
394
+
395
+ uint32_t regionType; /* NVTX_MEM_TYPE_* */
396
+
397
+ size_t regionDescCount;
398
+ size_t regionDescElementSize;
399
+ void const* regionDescElements; /* This will also become the handle for this region. */
400
+
401
+ } nvtxMemRegionsResizeBatch_v1;
402
+ typedef nvtxMemRegionsResizeBatch_v1 nvtxMemRegionsResizeBatch_t;
403
+
404
+ /** \brief Register a region of memory inside of a heap of linear process virtual memory
405
+ */
406
+ NVTX_DECLSPEC void NVTX_API nvtxMemRegionsResize(
407
+ nvtxDomainHandle_t domain,
408
+ nvtxMemRegionsResizeBatch_t const* desc);
409
+
410
+
411
+ #define NVTX_MEM_REGION_REF_TYPE_UNKNOWN 0x0
412
+ #define NVTX_MEM_REGION_REF_TYPE_POINTER 0x1
413
+ #define NVTX_MEM_REGION_REF_TYPE_HANDLE 0x2
414
+
415
+ /**
416
+ * \brief Register a region of memory inside of a heap.
417
+ *
418
+ * The heap refers the the heap within which the region resides.
419
+ * This can be from nvtxMemHeapRegister, `NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE`, or
420
+ * one provided from other extension API.
421
+ *
422
+ * The regionType arg will define which type is used in `regionDescArray`.
423
+ * The most commonly used type is NVTX_MEM_TYPE_VIRTUAL_ADDRESS.
424
+ *
425
+ * The regionCount arg is how many element are in regionDescArray and regionHandleArrayOut.
426
+ *
427
+ * The regionHandleArrayOut arg points to an array where the tool will provide region handles.
428
+ * If a pointer if provided, it is expected to have regionCount elements.
429
+ * This pointer can be NULL if regionType is NVTX_MEM_TYPE_VIRTUAL_ADDRESS. In this case,
430
+ * the user can use the pointer to the virtual memory to reference the region in other
431
+ * related functions which accept a nvtxMemRegionRef_t.
432
+ */
433
+ typedef struct nvtxMemRegionsUnregisterBatch_v1
434
+ {
435
+ uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
436
+ uint16_t structSize; /* Size of the structure. */
437
+
438
+ uint32_t refType; /* NVTX_MEM_REGION_REF_TYPE_* */
439
+
440
+ size_t refCount; /* count of elements in refArray */
441
+ size_t refElementSize;
442
+ nvtxMemRegionRef_t const* refElements; /* This will also become the handle for this region. */
443
+
444
+ } nvtxMemRegionsUnregisterBatch_v1;
445
+ typedef nvtxMemRegionsUnregisterBatch_v1 nvtxMemRegionsUnregisterBatch_t;
446
+
447
+ /**
448
+ * \brief Unregistration for regions of process virtual memory
449
+ *
450
+ * This is not necessary if the nvtx heap destroy function has been called that
451
+ * contains this object.
452
+ */
453
+ NVTX_DECLSPEC void NVTX_API nvtxMemRegionsUnregister(
454
+ nvtxDomainHandle_t domain,
455
+ nvtxMemRegionsUnregisterBatch_t const* desc);
456
+
457
+ typedef struct nvtxMemRegionNameDesc_v1
458
+ {
459
+ uint32_t regionRefType; /* NVTX_MEM_REGION_REF_TYPE_* */
460
+ uint32_t nameType; /* nvtxMessageType_t */
461
+
462
+ nvtxMemRegionRef_t region;
463
+ nvtxMessageValue_t name;
464
+
465
+ uint32_t category;
466
+ uint32_t reserved0;
467
+ } nvtxMemRegionNameDesc_v1;
468
+ typedef nvtxMemRegionNameDesc_v1 nvtxMemRegionNameDesc_t;
469
+
470
+
471
+ typedef struct nvtxMemRegionsNameBatch_v1
472
+ {
473
+ uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
474
+ uint16_t structSize; /* Size of the structure. */
475
+
476
+ uint32_t reserved0;
477
+
478
+ size_t regionCount;
479
+ size_t regionElementSize;
480
+ nvtxMemRegionNameDesc_t const* regionElements;
481
+ size_t reserved1;
482
+ } nvtxMemRegionsNameBatch_v1 ;
483
+ typedef nvtxMemRegionsNameBatch_v1 nvtxMemRegionsNameBatch_t;
484
+
485
+
486
+ /** \brief Name or rename a region. */
487
+ NVTX_DECLSPEC void NVTX_API nvtxMemRegionsName(
488
+ nvtxDomainHandle_t domain,
489
+ nvtxMemRegionsNameBatch_t const* desc);
490
+
491
+ /** \brief There are no permissions for this memory. */
492
+ #define NVTX_MEM_PERMISSIONS_REGION_FLAGS_NONE 0x0
493
+
494
+ /** \brief The memory is readable. */
495
+ #define NVTX_MEM_PERMISSIONS_REGION_FLAGS_READ 0x1
496
+
497
+ /** \brief The memory is writable. */
498
+ #define NVTX_MEM_PERMISSIONS_REGION_FLAGS_WRITE 0x2
499
+
500
+ /** \brief The memory is for atomic RW. */
501
+ #define NVTX_MEM_PERMISSIONS_REGION_FLAGS_ATOMIC 0x4
502
+
503
+ /**
504
+ * \brief The memory access permissions are reset for a region.
505
+ *
506
+ * This is as if never set, rather than documented defaults. As as result any flags
507
+ * indicating how unspecified regions are handle will affect this area.
508
+ *
509
+ * This should not be used with READ, WRITE, nor ATOMIC, as those flags would have no effect.
510
+ */
511
+ #define NVTX_MEM_PERMISSIONS_REGION_FLAGS_RESET 0x8
512
+
513
+
514
+ typedef struct nvtxMemPermissionsAssignRegionDesc_v1
515
+ {
516
+ uint32_t flags; /* NVTX_MEM_PERMISSIONS_REGION_FLAGS_* */
517
+ uint32_t regionRefType; /* NVTX_MEM_REGION_REF_TYPE_* */
518
+ nvtxMemRegionRef_t region;
519
+
520
+ } nvtxMemPermissionsAssignRegionDesc_v1 ;
521
+ typedef nvtxMemPermissionsAssignRegionDesc_v1 nvtxMemPermissionsAssignRegionDesc_t;
522
+
523
+
524
+ typedef struct nvtxMemPermissionsAssignBatch_v1
525
+ {
526
+ uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
527
+ uint16_t structSize; /* Size of the structure. */
528
+
529
+ uint32_t reserved0;
530
+
531
+ nvtxMemPermissionsHandle_t permissions;
532
+
533
+ size_t regionCount;
534
+ size_t regionElementSize;
535
+ nvtxMemPermissionsAssignRegionDesc_t const* regionElements;
536
+
537
+ size_t reserved1;
538
+ } nvtxMemPermissionsAssignBatch_v1 ;
539
+ typedef nvtxMemPermissionsAssignBatch_v1 nvtxMemPermissionsAssignBatch_t;
540
+
541
+
542
+ /** \brief Change the permissions of a region of process virtual memory. */
543
+ NVTX_DECLSPEC void NVTX_API nvtxMemPermissionsAssign(
544
+ nvtxDomainHandle_t domain,
545
+ nvtxMemPermissionsAssignBatch_t const* desc);
546
+
547
+
548
+ /**
549
+ * \brief Create a permissions object for fine grain thread-local control in
550
+ * multi-threading scenarios
551
+ *
552
+ * Unlike the global permissions object (NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE), a new
553
+ * permissions object is empty. There are no regions registered to it, so more memory is accessible
554
+ * if bound(bind) without calls to nvtxMemPermissionsSetAccess* first. The permissions are not
555
+ * active until nvtxMemPermissionsBind. See `nvtxMemPermissionsBind` for more details.
556
+ *
557
+ * Use the flags NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_* to control how the regions in
558
+ * this permission object will interact with global permissions when bound. You may choose to
559
+ * either replace global memory regions setting or overlay on top of them. The most common uses are
560
+ * as follows:
561
+ * * To limit tools to validate writing exclusively specified in this object but inherit all
562
+ * global read access regions use `NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_WRITE`
563
+ * * To limit tools to validate both read & write permissions exclusively specified in this
564
+ * object use NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_READ
565
+ * & NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_WRITE
566
+ *
567
+ * Also see `nvtxMemPermissionsBind` & `nvtxMemPermissionsSetAccess*`.
568
+ */
569
+ NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API nvtxMemPermissionsCreate(
570
+ nvtxDomainHandle_t domain,
571
+ int32_t creationflags); /* NVTX_MEM_PERMISSIONS_CREATE_FLAGS_* */
572
+
573
+ /**
574
+ * \brief Destroy the permissions object.
575
+ *
576
+ * If bound(bind), destroy will also unbind it.
577
+ */
578
+ NVTX_DECLSPEC void NVTX_API nvtxMemPermissionsDestroy(
579
+ nvtxDomainHandle_t domain,
580
+ nvtxMemPermissionsHandle_t permissionsHandle); /* only supported on objects from nvtxMemPermissionsCreate */
581
+
582
+ /** \brief Reset the permissions object back to its created state. */
583
+ NVTX_DECLSPEC void NVTX_API nvtxMemPermissionsReset(
584
+ nvtxDomainHandle_t domain,
585
+ nvtxMemPermissionsHandle_t permissionsHandle);
586
+ /* NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE and other special handles are supported */
587
+
588
+
589
+ #define NVTX_MEM_PERMISSIONS_BIND_FLAGS_NONE 0x0
590
+
591
+ /** \brief Upon binding, with the thread, exclude parent scope write regions instead of overlaying on top of them.
592
+ *
593
+ * EX A developer may chose to first prevent all writes except the ones specified to avoid
594
+ * OOB writes, since there are typically fewer regions written to than read from.
595
+ **/
596
+ #define NVTX_MEM_PERMISSIONS_BIND_FLAGS_STRICT_WRITE 0x2
597
+
598
+ /** \brief Upon binding, with the thread, exclude parent scope read regions instead of overlaying on top of them.
599
+ *
600
+ * EX After eliminating any errors when applying strict writes, a developer may then choose to
601
+ * annotate and enforce strict reads behaviors in segments of code.
602
+ **/
603
+ #define NVTX_MEM_PERMISSIONS_BIND_FLAGS_STRICT_READ 0x1
604
+
605
+ /** \brief Upon binding, with the thread, exclude parent scope atomic RW regions instead of overlaying on top of them.
606
+ *
607
+ * EX After eliminating any errors from read and write, a developer may chose to ensure
608
+ * that atomics are in their own region, removing standard read/write, and replacing with
609
+ * this strict atomic only access. This way they know that conventional reads or writes
610
+ * will not cause unexpected issues.
611
+ **/
612
+ #define NVTX_MEM_PERMISSIONS_BIND_FLAGS_STRICT_ATOMIC 0x4
613
+
614
+
615
+ #define NVTX_MEM_PERMISSIONS_BIND_SCOPE_UNKNOWN 0x0
616
+
617
+ /** \brief Bind to thread scope. In this case, tools should validate that local thread's
618
+ * execution is honoring the permissions as well as the state of NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE
619
+ * at the time of binding. If this is not bound then NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE should be
620
+ * used to validate the memory.
621
+ *
622
+ * Not all tools will support every scope, such a GPU sanitizer.
623
+ **/
624
+ #define NVTX_MEM_PERMISSIONS_BIND_SCOPE_CPU_THREAD 0x1
625
+
626
+ /**
627
+ * \brief Bind to CUDA stream scope.
628
+ *
629
+ * In this case, work enqueued to a CUDA stream should be validated by the tool,
630
+ * when it executes, that it respect the permission of the permission at the point
631
+ * of binding, as well as the appropriate nvtxMemCudaGetDevicePermissions at the
632
+ * time of binding. If this is not bound then nvtxMemCudaGetDevicePermissions at
633
+ * the time of stream enqueue should be used to validate the memory.
634
+ *
635
+ * This could apply to work done either on the GPU like a kernel launch or to
636
+ * CPU based callbacks like cudaStreamAddCallback if the tools supports it.
637
+ *
638
+ * Binding is applies locally to a CPU thread so that if N CPU threads are enqueuing
639
+ * work to the same stream (like the default stream) that there cannot be a race
640
+ * condition between thread binding vs launching their work. IE users should
641
+ * expect the permissions bound in the thread to be honored by the proceeding
642
+ * work (launches, copies, etc) invoked from in the CPU thread until unbound.
643
+ */
644
+ #define NVTX_MEM_PERMISSIONS_BIND_SCOPE_CUDA_STREAM 0x2
645
+
646
+
647
+ /**
648
+ * \brief Bind the permissions object into a particular scope on the caller thread
649
+ *
650
+ * Permissions do not take affect until binding. Binding permissions is a thread local
651
+ * activity that overrides global behaviors. This is to avoid multi-threaded race conditions,
652
+ *
653
+ * The scope dictates what type of processing it applies to, and when in some cases.
654
+ * EX1: NVTX_MEM_PERMISSIONS_BIND_SCOPE_CPU_THREAD applies to CPU code accessing memory while bound.
655
+ * EX2: NVTX_MEM_PERMISSIONS_BIND_SCOPE_CUDA_STREAM applies to CUDA streams, and the permissions
656
+ * must be recorded and applied when the work in the stream dequeues to executes. In this case
657
+ * it could be GPU or CPU, if the tool support both.
658
+ *
659
+ * Bind can be called again on the same object and thread to take any updates to the
660
+ * specified permission object or the inherited properties.
661
+ *
662
+ * Bind flags support changing how the binding process inherits region access control.
663
+ * In the case of thread scope this is NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE and from CUDA_STREAM
664
+ * this is nvtxMemCudaGetDevicePermissions. Choosing stricter modes allows the user to
665
+ * further reduce the access with less work, since memory by default, behaves as natural
666
+ * until the NVTX annotations instructs a tool to treat it anther way. See strict flags
667
+ * for more details.
668
+ *
669
+ * Also see nvtxMemPermissionsUnbind
670
+ */
671
+ NVTX_DECLSPEC void NVTX_API nvtxMemPermissionsBind(
672
+ nvtxDomainHandle_t domain,
673
+ nvtxMemPermissionsHandle_t permissions, /* special object like NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE are not supported */
674
+ uint32_t bindScope, /* NVTX_MEM_PERMISSIONS_BIND_SCOPE_* */
675
+ uint32_t bindFlags); /* NVTX_MEM_PERMISSIONS_BIND_FLAGS_* */
676
+
677
+ /**
678
+ * \brief Unbind the permissions object bound to the caller thread.
679
+ *
680
+ * Upon unbind, the thread local permissions for a scope are restored to the default
681
+ * behavior defined by the scope.
682
+ */
683
+ NVTX_DECLSPEC void NVTX_API nvtxMemPermissionsUnbind(
684
+ nvtxDomainHandle_t domain,
685
+ uint32_t bindScope);
686
+
687
+ /** @} */
688
+
689
+ #endif /* NVTX_MEM_CONTENTS_V1 */
690
+
691
+ #ifndef NVTX_MEM_CALLBACK_ID_V1
692
+ #define NVTX_MEM_CALLBACK_ID_V1
693
+
694
+ #define NVTX3EXT_CBID_nvtxMemHeapRegister 0
695
+ #define NVTX3EXT_CBID_nvtxMemHeapUnregister 1
696
+ #define NVTX3EXT_CBID_nvtxMemHeapReset 2
697
+ #define NVTX3EXT_CBID_nvtxMemRegionsRegister 3
698
+ #define NVTX3EXT_CBID_nvtxMemRegionsResize 4
699
+ #define NVTX3EXT_CBID_nvtxMemRegionsUnregister 5
700
+ #define NVTX3EXT_CBID_nvtxMemRegionsName 6
701
+ #define NVTX3EXT_CBID_nvtxMemPermissionsAssign 7
702
+ #define NVTX3EXT_CBID_nvtxMemPermissionsCreate 8
703
+ #define NVTX3EXT_CBID_nvtxMemPermissionsDestroy 9
704
+ #define NVTX3EXT_CBID_nvtxMemPermissionsReset 10
705
+ #define NVTX3EXT_CBID_nvtxMemPermissionsBind 11
706
+ #define NVTX3EXT_CBID_nvtxMemPermissionsUnbind 12
707
+
708
+ /* 13-16 in nvtxExtImplMemCudaRt_v1.h */
709
+ #define NVTX3EXT_CBID_nvtxMemCudaGetProcessWidePermissions 13
710
+ #define NVTX3EXT_CBID_nvtxMemCudaGetDeviceWidePermissions 14
711
+ #define NVTX3EXT_CBID_nvtxMemCudaSetPeerAccess 15
712
+ #define NVTX3EXT_CBID_nvtxMemCudaMarkInitialized 16
713
+
714
+ #endif /* NVTX_MEM_CALLBACK_ID_V1 */
715
+
716
+ /* Macros to create versioned symbols. */
717
+ #ifndef NVTX_EXT_MEM_VERSIONED_IDENTIFIERS_V1
718
+ #define NVTX_EXT_MEM_VERSIONED_IDENTIFIERS_V1
719
+ #define NVTX_EXT_MEM_VERSIONED_IDENTIFIER_L3(NAME, VERSION, COMPATID) \
720
+ NAME##_v##VERSION##_mem##COMPATID
721
+ #define NVTX_EXT_MEM_VERSIONED_IDENTIFIER_L2(NAME, VERSION, COMPATID) \
722
+ NVTX_EXT_MEM_VERSIONED_IDENTIFIER_L3(NAME, VERSION, COMPATID)
723
+ #define NVTX_EXT_MEM_VERSIONED_ID(NAME) \
724
+ NVTX_EXT_MEM_VERSIONED_IDENTIFIER_L2(NAME, NVTX_VERSION, NVTX_EXT_COMPATID_MEM)
725
+ #endif /* NVTX_EXT_MEM_VERSIONED_IDENTIFIERS_V1 */
726
+
727
+ #ifdef __GNUC__
728
+ #pragma GCC visibility push(internal)
729
+ #endif
730
+
731
+ /* Extension types are required for the implementation and the NVTX handler. */
732
+ #define NVTX_EXT_TYPES_GUARD /* Ensure other headers cannot be included directly */
733
+ #include "nvtxDetail/nvtxExtTypes.h"
734
+ #undef NVTX_EXT_TYPES_GUARD
735
+
736
+ #ifndef NVTX_NO_IMPL
737
+ /* Ensure other headers cannot be included directly */
738
+ #define NVTX_EXT_IMPL_MEM_GUARD
739
+ #include "nvtxDetail/nvtxExtImplMem_v1.h"
740
+ #undef NVTX_EXT_IMPL_MEM_GUARD
741
+ #endif /*NVTX_NO_IMPL*/
742
+
743
+ #ifdef __GNUC__
744
+ #pragma GCC visibility pop
745
+ #endif
746
+
747
+ #ifdef __cplusplus
748
+ }
749
+ #endif /* __cplusplus */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtMemCudaRt.h ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ #include "nvToolsExtMem.h"
32
+
33
+ #include "cuda.h"
34
+ #include "cuda_runtime.h"
35
+
36
+ #ifdef __cplusplus
37
+ extern "C" {
38
+ #endif /* __cplusplus */
39
+
40
+ #ifndef NVTX_MEM_CUDART_CONTENTS_V1
41
+ #define NVTX_MEM_CUDART_CONTENTS_V1
42
+
43
+ /** \defgroup MEMORY_CUDART Memory CUDA Runtime
44
+ * See page \ref PAGE_MEMORY_CUDART.
45
+ * @{
46
+ */
47
+
48
+ /** \brief The memory is from a CUDA runtime array.
49
+ *
50
+ * Relevant functions: cudaMallocArray, cudaMalloc3DArray
51
+ * Also cudaArray_t from other types such as cudaMipmappedArray_t
52
+ *
53
+ * NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE is not supported
54
+ *
55
+ * nvtxMemHeapRegister receives a heapDesc of type cudaArray_t because the description can be retrieved by tools through cudaArrayGetInfo()
56
+ * nvtxMemRegionRegisterEx receives a regionDesc of type nvtxMemCudaArrayRangeDesc_t
57
+ */
58
+ #define NVTX_MEM_TYPE_CUDA_ARRAY 0x11
59
+
60
+ /** \brief structure to describe memory in a CUDA array object
61
+ */
62
+ typedef struct nvtxMemCudaArrayRangeDesc_v1
63
+ {
64
+ uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
65
+ uint16_t structSize; /* Size of the structure. */
66
+ uint32_t reserved0;
67
+ cudaArray_t src;
68
+ size_t offset[3];
69
+ size_t extent[3];
70
+ } nvtxMemCudaArrayRangeDesc_v1;
71
+ typedef nvtxMemCudaArrayRangeDesc_v1 nvtxMemCudaArrayRangeDesc_t;
72
+
73
+
74
+ /** \brief The memory is from a CUDA device array.
75
+ *
76
+ * Relevant functions: cuArrayCreate, cuArray3DCreate
77
+ * Also CUarray from other types such as CUmipmappedArray
78
+ *
79
+ * NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE is not supported
80
+ *
81
+ * nvtxMemHeapRegister receives a heapDesc of type cudaArray_t because the description can be retrieved by tools through cudaArrayGetInfo()
82
+ * nvtxMemRegionRegisterEx receives a regionDesc of type nvtxMemCuArrayRangeDesc_t
83
+ */
84
+ #define NVTX_MEM_TYPE_CU_ARRAY 0x12
85
+
86
+ /** \brief structure to describe memory in a CUDA array object
87
+ */
88
+ typedef struct nvtxMemCuArrayRangeDesc_v1
89
+ {
90
+ uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
91
+ uint16_t structSize; /* Size of the structure. */
92
+ uint32_t reserved0;
93
+ CUarray src;
94
+ size_t offset[3];
95
+ size_t extent[3];
96
+ } nvtxMemCuArrayRangeDesc_v1;
97
+ typedef nvtxMemCuArrayRangeDesc_v1 nvtxMemCuArrayRangeDesc_t;
98
+
99
+ /* Reserving 0x2-0xF for more common types */
100
+
101
+ #define NVTX_MEM_CUDA_PEER_ALL_DEVICES -1
102
+
103
+ /** \brief Get the permission object that represent the CUDA runtime device
104
+ * or cuda driver context
105
+ *
106
+ * This object will allow developers to adjust permissions applied to work executed
107
+ * on the GPU. It may be inherited or overridden by permissions object bound
108
+ * with NVTX_MEM_PERMISSIONS_BIND_SCOPE_CUDA_STREAM, depending on the binding flags.
109
+ *
110
+ * Ex. change the peer to peer access permissions between devices in entirety
111
+ * or punch through special holes
112
+ *
113
+ * By default, all memory is accessible that naturally would be to a CUDA kernel until
114
+ * modified otherwise by nvtxMemCudaSetPeerAccess or changing regions.
115
+ *
116
+ * This object should also represent the CUDA driver API level context.
117
+ */
118
+ NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API nvtxMemCudaGetProcessWidePermissions(
119
+ nvtxDomainHandle_t domain);
120
+
121
+ /** \brief Get the permission object that represent the CUDA runtime device
122
+ * or cuda driver context
123
+ *
124
+ * This object will allow developers to adjust permissions applied to work executed
125
+ * on the GPU. It may be inherited or overridden by permissions object bound
126
+ * with NVTX_MEM_PERMISSIONS_BIND_SCOPE_CUDA_STREAM, depending on the binding flags.
127
+ *
128
+ * Ex. change the peer to peer access permissions between devices in entirety
129
+ * or punch through special holes
130
+ *
131
+ * By default, all memory is accessible that naturally would be to a CUDA kernel until
132
+ * modified otherwise by nvtxMemCudaSetPeerAccess or changing regions.
133
+ *
134
+ * This object should also represent the CUDA driver API level context.
135
+ */
136
+ NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API nvtxMemCudaGetDeviceWidePermissions(
137
+ nvtxDomainHandle_t domain,
138
+ int device);
139
+
140
+ /** \brief Change the default behavior for all memory mapped in from a particular device.
141
+ *
142
+ * While typically all memory defaults to readable and writable, users may desire to limit
143
+ * access to reduced default permissions such as read-only and a per-device basis.
144
+ *
145
+ * Regions can used to further override smaller windows of memory.
146
+ *
147
+ * devicePeer can be NVTX_MEM_CUDA_PEER_ALL_DEVICES
148
+ *
149
+ */
150
+ NVTX_DECLSPEC void NVTX_API nvtxMemCudaSetPeerAccess(
151
+ nvtxDomainHandle_t domain,
152
+ nvtxMemPermissionsHandle_t permissions,
153
+ int devicePeer, /* device number such as from cudaGetDevice() or NVTX_MEM_CUDA_PEER_ALL_DEVICES */
154
+ uint32_t flags); /* NVTX_MEM_PERMISSIONS_REGION_FLAGS_* */
155
+
156
+ /** \brief Mark memory ranges as initialized.
157
+ *
158
+ * The heap refers the the heap within which the region resides.
159
+ * This can be from nvtxMemHeapRegister, NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE, or one provided from other extension API.
160
+ *
161
+ * The regionType arg will define which type is used in regionDescArray.
162
+ * The most commonly used type is NVTX_MEM_TYPE_VIRTUAL_ADDRESS.
163
+ *
164
+ * The regionCount arg is how many element are in regionDescArray and regionHandleArrayOut.
165
+ *
166
+ * The regionHandleArrayOut arg points to an array where the tool will provide region handles.
167
+ * If a pointer if provided, it is expected to have regionCount elements.
168
+ * This pointer can be NULL if regionType is NVTX_MEM_TYPE_VIRTUAL_ADDRESS. In this case,
169
+ * the user can use the pointer to the virtual memory to reference the region in other
170
+ * related functions which accept a nvtxMemRegionRef_t.
171
+ */
172
+ typedef struct nvtxMemMarkInitializedBatch_v1
173
+ {
174
+ uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
175
+ uint16_t structSize; /* Size of the structure. */
176
+
177
+ uint32_t regionType; /* NVTX_MEM_TYPE_* */
178
+
179
+ size_t regionDescCount;
180
+ size_t regionDescElementSize;
181
+ void const* regionDescElements; /* this will also become the handle for this region */
182
+
183
+ } nvtxMemMarkInitializedBatch_v1;
184
+ typedef nvtxMemMarkInitializedBatch_v1 nvtxMemMarkInitializedBatch_t;
185
+
186
+ /** \brief Register a region of memory inside of a heap of linear process virtual memory
187
+ *
188
+ * stream is the CUDA stream where the range was accessed and initialized.
189
+ */
190
+ NVTX_DECLSPEC void NVTX_API nvtxMemCudaMarkInitialized(
191
+ nvtxDomainHandle_t domain,
192
+ cudaStream_t stream,
193
+ uint8_t isPerThreadStream, /* 0 for false, otherwise true */
194
+ nvtxMemMarkInitializedBatch_t const* desc);
195
+
196
+ /** @} */
197
+
198
+ #endif /* NVTX_MEM_CUDART_CONTENTS_V1 */
199
+
200
+ #ifdef __GNUC__
201
+ #pragma GCC visibility push(internal)
202
+ #endif
203
+
204
+ #ifndef NVTX_NO_IMPL
205
+ #define NVTX_EXT_IMPL_MEM_CUDART_GUARD /* Ensure other headers cannot be included directly */
206
+ #include "nvtxDetail/nvtxExtImplMemCudaRt_v1.h"
207
+ #undef NVTX_EXT_IMPL_MEM_CUDART_GUARD
208
+ #endif /*NVTX_NO_IMPL*/
209
+
210
+ #ifdef __GNUC__
211
+ #pragma GCC visibility pop
212
+ #endif
213
+
214
+
215
+ #ifdef __cplusplus
216
+ }
217
+ #endif /* __cplusplus */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtOpenCL.h ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ #include "nvToolsExt.h"
32
+
33
+ #include <CL/cl.h>
34
+
35
+ #ifndef NVTOOLSEXT_OPENCL_V3
36
+ #define NVTOOLSEXT_OPENCL_V3
37
+
38
+ #ifdef __cplusplus
39
+ extern "C" {
40
+ #endif /* __cplusplus */
41
+
42
+ /* ========================================================================= */
43
+ /** \name Functions for OpenCL Resource Naming
44
+ */
45
+ /** \addtogroup RESOURCE_NAMING
46
+ * \section RESOURCE_NAMING_OPENCL OpenCL Resource Naming
47
+ *
48
+ * This section covers the API functions that allow to annotate OpenCL resources
49
+ * with user-provided names.
50
+ *
51
+ * @{
52
+ */
53
+
54
+ /* ------------------------------------------------------------------------- */
55
+ /* \cond SHOW_HIDDEN
56
+ * \brief Used to build a non-colliding value for resource types separated class
57
+ * \version NVTX_VERSION_2
58
+ */
59
+ #define NVTX_RESOURCE_CLASS_OPENCL 6
60
+ /** \endcond */
61
+
62
+ /* ------------------------------------------------------------------------- */
63
+ /** \brief Resource types for OpenCL
64
+ */
65
+ typedef enum nvtxResourceOpenCLType_t
66
+ {
67
+ NVTX_RESOURCE_TYPE_OPENCL_DEVICE = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 1),
68
+ NVTX_RESOURCE_TYPE_OPENCL_CONTEXT = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 2),
69
+ NVTX_RESOURCE_TYPE_OPENCL_COMMANDQUEUE = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 3),
70
+ NVTX_RESOURCE_TYPE_OPENCL_MEMOBJECT = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 4),
71
+ NVTX_RESOURCE_TYPE_OPENCL_SAMPLER = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 5),
72
+ NVTX_RESOURCE_TYPE_OPENCL_PROGRAM = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 6),
73
+ NVTX_RESOURCE_TYPE_OPENCL_EVENT = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 7)
74
+ } nvtxResourceOpenCLType_t;
75
+
76
+
77
+ /* ------------------------------------------------------------------------- */
78
+ /** \brief Annotates an OpenCL device.
79
+ *
80
+ * Allows to associate an OpenCL device with a user-provided name.
81
+ *
82
+ * \param device - The handle of the OpenCL device to name.
83
+ * \param name - The name of the OpenCL device.
84
+ *
85
+ * \version NVTX_VERSION_1
86
+ * @{ */
87
+ NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceA(cl_device_id device, const char* name);
88
+ NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceW(cl_device_id device, const wchar_t* name);
89
+ /** @} */
90
+
91
+ /* ------------------------------------------------------------------------- */
92
+ /** \brief Annotates an OpenCL context.
93
+ *
94
+ * Allows to associate an OpenCL context with a user-provided name.
95
+ *
96
+ * \param context - The handle of the OpenCL context to name.
97
+ * \param name - The name of the OpenCL context.
98
+ *
99
+ * \version NVTX_VERSION_1
100
+ * @{ */
101
+ NVTX_DECLSPEC void NVTX_API nvtxNameClContextA(cl_context context, const char* name);
102
+ NVTX_DECLSPEC void NVTX_API nvtxNameClContextW(cl_context context, const wchar_t* name);
103
+ /** @} */
104
+
105
+ /* ------------------------------------------------------------------------- */
106
+ /** \brief Annotates an OpenCL command queue.
107
+ *
108
+ * Allows to associate an OpenCL command queue with a user-provided name.
109
+ *
110
+ * \param command_queue - The handle of the OpenCL command queue to name.
111
+ * \param name - The name of the OpenCL command queue.
112
+ *
113
+ * \version NVTX_VERSION_1
114
+ * @{ */
115
+ NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueA(cl_command_queue command_queue, const char* name);
116
+ NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueW(cl_command_queue command_queue, const wchar_t* name);
117
+ /** @} */
118
+
119
+ /* ------------------------------------------------------------------------- */
120
+ /** \brief Annotates an OpenCL memory object.
121
+ *
122
+ * Allows to associate an OpenCL memory object with a user-provided name.
123
+ *
124
+ * \param memobj - The handle of the OpenCL memory object to name.
125
+ * \param name - The name of the OpenCL memory object.
126
+ *
127
+ * \version NVTX_VERSION_1
128
+ * @{ */
129
+ NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectA(cl_mem memobj, const char* name);
130
+ NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectW(cl_mem memobj, const wchar_t* name);
131
+ /** @} */
132
+
133
+ /* ------------------------------------------------------------------------- */
134
+ /** \brief Annotates an OpenCL sampler.
135
+ *
136
+ * Allows to associate an OpenCL sampler with a user-provided name.
137
+ *
138
+ * \param sampler - The handle of the OpenCL sampler to name.
139
+ * \param name - The name of the OpenCL sampler.
140
+ *
141
+ * \version NVTX_VERSION_1
142
+ * @{ */
143
+ NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerA(cl_sampler sampler, const char* name);
144
+ NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerW(cl_sampler sampler, const wchar_t* name);
145
+ /** @} */
146
+
147
+ /* ------------------------------------------------------------------------- */
148
+ /** \brief Annotates an OpenCL program.
149
+ *
150
+ * Allows to associate an OpenCL program with a user-provided name.
151
+ *
152
+ * \param program - The handle of the OpenCL program to name.
153
+ * \param name - The name of the OpenCL program.
154
+ *
155
+ * \code
156
+ * cpProgram = clCreateProgramWithSource(cxGPUContext, 1,
157
+ * (const char **) &cSourceCL, &program_length, &ciErrNum);
158
+ * shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
159
+ * nvtxNameClProgram(cpProgram, L"PROGRAM_NAME");
160
+ * \endcode
161
+ *
162
+ * \version NVTX_VERSION_1
163
+ * @{ */
164
+ NVTX_DECLSPEC void NVTX_API nvtxNameClProgramA(cl_program program, const char* name);
165
+ NVTX_DECLSPEC void NVTX_API nvtxNameClProgramW(cl_program program, const wchar_t* name);
166
+ /** @} */
167
+
168
+ /* ------------------------------------------------------------------------- */
169
+ /** \brief Annotates an OpenCL event.
170
+ *
171
+ * Allows to associate an OpenCL event with a user-provided name.
172
+ *
173
+ * \param evnt - The handle of the OpenCL event to name.
174
+ * \param name - The name of the OpenCL event.
175
+ *
176
+ * \version NVTX_VERSION_1
177
+ * @{ */
178
+ NVTX_DECLSPEC void NVTX_API nvtxNameClEventA(cl_event evnt, const char* name);
179
+ NVTX_DECLSPEC void NVTX_API nvtxNameClEventW(cl_event evnt, const wchar_t* name);
180
+ /** @} */
181
+
182
+ /** @} */ /* END RESOURCE_NAMING */
183
+
184
+ /* ========================================================================= */
185
+ #ifdef UNICODE
186
+ #define nvtxNameClDevice nvtxNameClDeviceW
187
+ #define nvtxNameClContext nvtxNameClContextW
188
+ #define nvtxNameClCommandQueue nvtxNameClCommandQueueW
189
+ #define nvtxNameClMemObject nvtxNameClMemObjectW
190
+ #define nvtxNameClSampler nvtxNameClSamplerW
191
+ #define nvtxNameClProgram nvtxNameClProgramW
192
+ #define nvtxNameClEvent nvtxNameClEventW
193
+ #else
194
+ #define nvtxNameClDevice nvtxNameClDeviceA
195
+ #define nvtxNameClContext nvtxNameClContextA
196
+ #define nvtxNameClCommandQueue nvtxNameClCommandQueueA
197
+ #define nvtxNameClMemObject nvtxNameClMemObjectA
198
+ #define nvtxNameClSampler nvtxNameClSamplerA
199
+ #define nvtxNameClProgram nvtxNameClProgramA
200
+ #define nvtxNameClEvent nvtxNameClEventA
201
+ #endif
202
+
203
+ #ifdef __cplusplus
204
+ }
205
+ #endif /* __cplusplus */
206
+
207
+ #ifndef NVTX_NO_IMPL
208
+ #define NVTX_IMPL_GUARD_OPENCL /* Ensure other headers cannot be included directly */
209
+ #include "nvtxDetail/nvtxImplOpenCL_v3.h"
210
+ #undef NVTX_IMPL_GUARD_OPENCL
211
+ #endif /*NVTX_NO_IMPL*/
212
+
213
+ #endif /* NVTOOLSEXT_OPENCL_V3 */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtPayload.h ADDED
@@ -0,0 +1,1478 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ #include "nvToolsExt.h"
32
+
33
+ /* Optionally include helper macros. */
34
+ /* #include "nvToolsExtPayloadHelper.h" */
35
+
36
+ /**
37
+ * If needed, semantic extension headers can be included after this header.
38
+ */
39
+
40
+ /**
41
+ * \brief The compatibility ID is used for versioning of this extension.
42
+ */
43
+ #ifndef NVTX_EXT_PAYLOAD_COMPATID
44
+ #define NVTX_EXT_PAYLOAD_COMPATID 0x0104
45
+ #endif
46
+
47
+ /**
48
+ * \brief The module ID identifies the payload extension. It has to be unique
49
+ * among the extension modules.
50
+ */
51
+ #ifndef NVTX_EXT_PAYLOAD_MODULEID
52
+ #define NVTX_EXT_PAYLOAD_MODULEID 2
53
+ #endif
54
+
55
+ /**
56
+ * \brief Additional value for the enum @ref nvtxPayloadType_t
57
+ */
58
+ #ifndef NVTX_PAYLOAD_TYPE_EXT
59
+ #define NVTX_PAYLOAD_TYPE_EXT (NVTX_STATIC_CAST(int32_t, 0xDFBD0009))
60
+ #endif
61
+
62
+ /** ---------------------------------------------------------------------------
63
+ * Payload schema entry flags. Used for @ref nvtxPayloadSchemaEntry_t::flags.
64
+ * ------------------------------------------------------------------------- */
65
+ #ifndef NVTX_PAYLOAD_ENTRY_FLAGS_V1
66
+ #define NVTX_PAYLOAD_ENTRY_FLAGS_V1
67
+
68
+ #define NVTX_PAYLOAD_ENTRY_FLAG_UNUSED 0
69
+
70
+ /**
71
+ * Absolute pointer into a payload (entry) of the same event.
72
+ */
73
+ #define NVTX_PAYLOAD_ENTRY_FLAG_POINTER (1 << 1)
74
+
75
+ /**
76
+ * Offset from base address of the payload.
77
+ */
78
+ #define NVTX_PAYLOAD_ENTRY_FLAG_OFFSET_FROM_BASE (1 << 2)
79
+
80
+ /**
81
+ * Offset from the end of this payload entry.
82
+ */
83
+ #define NVTX_PAYLOAD_ENTRY_FLAG_OFFSET_FROM_HERE (1 << 3)
84
+
85
+ /**
86
+ * The value is an array with fixed length, set with the field `arrayLength`.
87
+ */
88
+ #define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_FIXED_SIZE (1 << 4)
89
+
90
+ /**
91
+ * The value is a zero-/null-terminated array.
92
+ */
93
+ #define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_ZERO_TERMINATED (2 << 4)
94
+
95
+ /**
96
+ * \brief A single or multi-dimensional array of variable length.
97
+ *
98
+ * The field `arrayOrUnionDetail` contains the index of the schema entry that
99
+ * holds the length(s). If the length entry is a scalar, then this entry is a 1D
100
+ * array. If the length entry is a fixed-size array, then the number of
101
+ * dimensions is defined with the registration of the schema. If the length
102
+ * entry is a zero-terminated array, then the array of the dimensions can be
103
+ * determined at runtime.
104
+ * For multidimensional arrays, values are stored in row-major order, with rows
105
+ * being stored consecutively in contiguous memory. The size of the entry (in
106
+ * bytes) is the product of the dimensions multiplied with size of the array
107
+ * element.
108
+ */
109
+ #define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_LENGTH_INDEX (3 << 4)
110
+
111
+ /**
112
+ * \brief A single or multi-dimensional array of variable length, where the
113
+ * dimensions are stored in a different payload (index) of the same event.
114
+ *
115
+ * This enables an existing address to an array to be directly passed, while the
116
+ * dimensions are defined in a separate payload (with only one payload entry).
117
+ */
118
+ #define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_LENGTH_PAYLOAD_INDEX (4 << 4)
119
+
120
+ /**
121
+ * \brief The value or data that is pointed to by this payload entry value shall
122
+ * be copied by the NVTX handler.
123
+ *
124
+ * A tool may not support deep copy and just ignore this flag.
125
+ * See @ref NVTX_PAYLOAD_SCHEMA_FLAG_DEEP_COPY for more details.
126
+ */
127
+ #define NVTX_PAYLOAD_ENTRY_FLAG_DEEP_COPY (1 << 8)
128
+
129
+ /**
130
+ * Notifies the NVTX handler to hide this entry in case of visualization.
131
+ */
132
+ #define NVTX_PAYLOAD_ENTRY_FLAG_HIDE (1 << 9)
133
+
134
+ /**
135
+ * The entry specifies the event message. Any string type can be used.
136
+ */
137
+ #define NVTX_PAYLOAD_ENTRY_FLAG_EVENT_MESSAGE (1 << 10)
138
+
139
+ /**
140
+ * \brief The entry contains a timestamp.
141
+ *
142
+ * The time source might be provided via the entry semantics field. In most
143
+ * cases, the timestamp (entry) type is @ref NVTX_PAYLOAD_ENTRY_TYPE_INT64.
144
+ */
145
+ #define NVTX_PAYLOAD_ENTRY_FLAG_TIMESTAMP (2 << 10)
146
+
147
+ /**
148
+ * These flags specify the NVTX event type to which an entry refers.
149
+ */
150
+ #define NVTX_PAYLOAD_ENTRY_FLAG_RANGE_BEGIN (1 << 12)
151
+ #define NVTX_PAYLOAD_ENTRY_FLAG_RANGE_END (2 << 12)
152
+ #define NVTX_PAYLOAD_ENTRY_FLAG_MARK (3 << 12)
153
+ #define NVTX_PAYLOAD_ENTRY_FLAG_COUNTER (4 << 12)
154
+
155
+ #endif /* NVTX_PAYLOAD_ENTRY_FLAGS_V1 */
156
+ /** ---------------------------------------------------------------------------
157
+ * END: Payload schema entry flags.
158
+ * ------------------------------------------------------------------------- */
159
+
160
+ /**
161
+ * @note The 'array' flags assume that the array is embedded. Otherwise,
162
+ * @ref NVTX_PAYLOAD_ENTRY_FLAG_POINTER has to be additionally specified. Some
163
+ * combinations may be invalid based on the `NVTX_PAYLOAD_SCHEMA_TYPE_*` this
164
+ * entry is enclosed. For instance, variable length embedded arrays are valid
165
+ * within @ref NVTX_PAYLOAD_SCHEMA_TYPE_DYNAMIC but invalid with
166
+ * @ref NVTX_PAYLOAD_SCHEMA_TYPE_STATIC. See `NVTX_PAYLOAD_SCHEMA_TYPE_*` for
167
+ * additional details.
168
+ */
169
+
170
+ /* Helper macro to check if an entry represents an array. */
171
+ #define NVTX_PAYLOAD_ENTRY_FLAG_IS_ARRAY (\
172
+ NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_FIXED_SIZE | \
173
+ NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_ZERO_TERMINATED | \
174
+ NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_LENGTH_INDEX)
175
+
176
+ #define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_TYPE(F) \
177
+ ((F) & NVTX_PAYLOAD_ENTRY_FLAG_IS_ARRAY)
178
+
179
+
180
+ /** ---------------------------------------------------------------------------
181
+ * Types of entries in a payload schema.
182
+ *
183
+ * @note Several of the predefined types contain the size (in bits) in their
184
+ * names. For some data types the size (in bytes) is not fixed and may differ
185
+ * for different platforms/operating systems/compilers. To provide portability,
186
+ * an array of sizes (in bytes) for type 1 to 28 ( @ref
187
+ * NVTX_PAYLOAD_ENTRY_TYPE_CHAR to @ref NVTX_PAYLOAD_ENTRY_TYPE_INFO_ARRAY_SIZE)
188
+ * is passed to the NVTX extension initialization function
189
+ * @ref InitializeInjectionNvtxExtension via the `extInfo` field of
190
+ * @ref nvtxExtModuleInfo_t.
191
+ * ------------------------------------------------------------------------- */
192
+ #ifndef NVTX_PAYLOAD_ENTRY_TYPES_V1
193
+ #define NVTX_PAYLOAD_ENTRY_TYPES_V1
194
+
195
+ #define NVTX_PAYLOAD_ENTRY_TYPE_INVALID 0
196
+
197
+ /**
198
+ * Basic integer types.
199
+ */
200
+ #define NVTX_PAYLOAD_ENTRY_TYPE_CHAR 1
201
+ #define NVTX_PAYLOAD_ENTRY_TYPE_UCHAR 2
202
+ #define NVTX_PAYLOAD_ENTRY_TYPE_SHORT 3
203
+ #define NVTX_PAYLOAD_ENTRY_TYPE_USHORT 4
204
+ #define NVTX_PAYLOAD_ENTRY_TYPE_INT 5
205
+ #define NVTX_PAYLOAD_ENTRY_TYPE_UINT 6
206
+ #define NVTX_PAYLOAD_ENTRY_TYPE_LONG 7
207
+ #define NVTX_PAYLOAD_ENTRY_TYPE_ULONG 8
208
+ #define NVTX_PAYLOAD_ENTRY_TYPE_LONGLONG 9
209
+ #define NVTX_PAYLOAD_ENTRY_TYPE_ULONGLONG 10
210
+
211
+ /**
212
+ * Integer types with explicit size.
213
+ */
214
+ #define NVTX_PAYLOAD_ENTRY_TYPE_INT8 11
215
+ #define NVTX_PAYLOAD_ENTRY_TYPE_UINT8 12
216
+ #define NVTX_PAYLOAD_ENTRY_TYPE_INT16 13
217
+ #define NVTX_PAYLOAD_ENTRY_TYPE_UINT16 14
218
+ #define NVTX_PAYLOAD_ENTRY_TYPE_INT32 15
219
+ #define NVTX_PAYLOAD_ENTRY_TYPE_UINT32 16
220
+ #define NVTX_PAYLOAD_ENTRY_TYPE_INT64 17
221
+ #define NVTX_PAYLOAD_ENTRY_TYPE_UINT64 18
222
+
223
+ /**
224
+ * Floating point types
225
+ */
226
+ #define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT 19
227
+ #define NVTX_PAYLOAD_ENTRY_TYPE_DOUBLE 20
228
+ #define NVTX_PAYLOAD_ENTRY_TYPE_LONGDOUBLE 21
229
+
230
+ /**
231
+ * Size type (`size_t` in C).
232
+ */
233
+ #define NVTX_PAYLOAD_ENTRY_TYPE_SIZE 22
234
+
235
+ /**
236
+ * Any address, e.g. `void*`. If the pointer type matters, use the flag @ref
237
+ * NVTX_PAYLOAD_ENTRY_FLAG_POINTER and the respective type instead.
238
+ */
239
+ #define NVTX_PAYLOAD_ENTRY_TYPE_ADDRESS 23
240
+
241
+ /**
242
+ * Special character types.
243
+ */
244
+ #define NVTX_PAYLOAD_ENTRY_TYPE_WCHAR 24 /* wide character (since C90) */
245
+ #define NVTX_PAYLOAD_ENTRY_TYPE_CHAR8 25 /* since C2x and C++20 */
246
+ #define NVTX_PAYLOAD_ENTRY_TYPE_CHAR16 26
247
+ #define NVTX_PAYLOAD_ENTRY_TYPE_CHAR32 27
248
+
249
+ /**
250
+ * There is type size and alignment information for all previous types.
251
+ */
252
+ #define NVTX_PAYLOAD_ENTRY_TYPE_INFO_ARRAY_SIZE (NVTX_PAYLOAD_ENTRY_TYPE_CHAR32 + 1)
253
+
254
+ /**
255
+ * Store raw 8-bit binary data. As with `char`, 1-byte alignment is assumed.
256
+ * Typically, a tool will display this as hex or binary.
257
+ */
258
+ #define NVTX_PAYLOAD_ENTRY_TYPE_BYTE 32
259
+
260
+ /**
261
+ * These types do not have standardized equivalents. It is assumed that the
262
+ * number at the end corresponds to the bits used to store the value and that
263
+ * the alignment corresponds to standardized types of the same size.
264
+ * A tool may not support these types.
265
+ */
266
+ #define NVTX_PAYLOAD_ENTRY_TYPE_INT128 33
267
+ #define NVTX_PAYLOAD_ENTRY_TYPE_UINT128 34
268
+
269
+ #define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT16 42
270
+ #define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT32 43
271
+ #define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT64 44
272
+ #define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT128 45
273
+
274
+ #define NVTX_PAYLOAD_ENTRY_TYPE_BF16 50
275
+ #define NVTX_PAYLOAD_ENTRY_TYPE_TF32 52
276
+
277
+ /**
278
+ * Data types are as defined by NVTXv3 core.
279
+ */
280
+ #define NVTX_PAYLOAD_ENTRY_TYPE_CATEGORY 68 /* uint32_t */
281
+ #define NVTX_PAYLOAD_ENTRY_TYPE_COLOR_ARGB 69 /* uint32_t */
282
+
283
+ /**
284
+ * The scope of events or counters (see `nvtxScopeRegister`).
285
+ */
286
+ #define NVTX_PAYLOAD_ENTRY_TYPE_SCOPE_ID 70 /* uint64_t */
287
+
288
+ /**
289
+ * Process ID as scope.
290
+ */
291
+ #define NVTX_PAYLOAD_ENTRY_TYPE_PID_UINT32 71
292
+ #define NVTX_PAYLOAD_ENTRY_TYPE_PID_UINT64 72
293
+
294
+ /**
295
+ * Thread ID as scope.
296
+ */
297
+ #define NVTX_PAYLOAD_ENTRY_TYPE_TID_UINT32 73
298
+ #define NVTX_PAYLOAD_ENTRY_TYPE_TID_UINT64 74
299
+
300
+ /**
301
+ * \brief String types.
302
+ *
303
+ * If no flags are set for the entry and `arrayOrUnionDetail > 0`, the entry is
304
+ * assumed to be a fixed-size string with the given length, embedded in the payload.
305
+ * `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_FIXED_SIZE` is redundant for fixed-size strings.
306
+ *
307
+ * Setting the flag `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_ZERO_TERMINATED` specifies a
308
+ * zero-terminated string. If `arrayOrUnionDetail > 0`, the entry is handled as
309
+ * a zero-terminated array of fixed-size strings.
310
+ *
311
+ * Setting the flag `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_LENGTH_INDEX` specifies a
312
+ * variable-length string with the length given in the entry specified by the
313
+ * field `arrayOrUnionDetail`.
314
+ */
315
+ #define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING 75 /* `char*`, system LOCALE */
316
+ #define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING_UTF8 76
317
+ #define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING_UTF16 77
318
+ #define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING_UTF32 78
319
+
320
+ /**
321
+ * The entry value is of type @ref nvtxStringHandle_t returned by
322
+ * @ref nvtxDomainRegisterString.
323
+ */
324
+ #define NVTX_PAYLOAD_ENTRY_TYPE_NVTX_REGISTERED_STRING_HANDLE 80
325
+
326
+ /**
327
+ * This type marks the union selector member (entry index) in schemas used by
328
+ * a union with internal selector.
329
+ * See @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION_WITH_INTERNAL_SELECTOR.
330
+ */
331
+ #define NVTX_PAYLOAD_ENTRY_TYPE_UNION_SELECTOR 100
332
+
333
+ /**
334
+ * \brief Predefined schema ID for payload data that is referenced in another payload.
335
+ *
336
+ * This schema ID can be used in @ref nvtxPayloadData_t::schema_id to indicate that the
337
+ * payload is a blob of memory which other payload entries may point into.
338
+ * A tool will not expose this payload directly.
339
+ *
340
+ * This schema ID cannot be used as schema entry type!
341
+ */
342
+ #define NVTX_TYPE_PAYLOAD_SCHEMA_REFERENCED 1022
343
+
344
+ /**
345
+ * \brief Predefined schema ID for raw payload data.
346
+ *
347
+ * This schema ID can be used in @ref nvtxPayloadData_t::schema_id to indicate
348
+ * that the payload is a blob, which can be shown with an arbitrary data viewer.
349
+ * This schema ID cannot be used as schema entry type!
350
+ */
351
+ #define NVTX_TYPE_PAYLOAD_SCHEMA_RAW 1023
352
+
353
+ /* Custom (static) schema IDs. */
354
+ #define NVTX_PAYLOAD_SCHEMA_ID_STATIC_START (1 << 24)
355
+
356
+ /* Dynamic schema IDs (generated by the tool) start here. */
357
+ #define NVTX_PAYLOAD_SCHEMA_ID_DYNAMIC_START (NVTX_STATIC_CAST(uint64_t, 1) << 32)
358
+
359
+ #endif /* NVTX_PAYLOAD_ENTRY_TYPES_V1 */
360
+ /** ---------------------------------------------------------------------------
361
+ * END: Payload schema entry types.
362
+ * ------------------------------------------------------------------------- */
363
+
364
+
365
+ #ifndef NVTX_PAYLOAD_SCHEMA_TYPES_V1
366
+ #define NVTX_PAYLOAD_SCHEMA_TYPES_V1
367
+
368
+ /**
369
+ * \brief The payload schema type.
370
+ *
371
+ * A schema can be either of the following types. It is set with
372
+ * @ref nvtxPayloadSchemaAttr_t::type.
373
+ */
374
+ #define NVTX_PAYLOAD_SCHEMA_TYPE_INVALID 0
375
+ #define NVTX_PAYLOAD_SCHEMA_TYPE_STATIC 1
376
+ #define NVTX_PAYLOAD_SCHEMA_TYPE_DYNAMIC 2
377
+ #define NVTX_PAYLOAD_SCHEMA_TYPE_UNION 3
378
+ #define NVTX_PAYLOAD_SCHEMA_TYPE_UNION_WITH_INTERNAL_SELECTOR 4
379
+
380
+ #endif /* NVTX_PAYLOAD_SCHEMA_TYPES_V1 */
381
+
382
+
383
+ #ifndef NVTX_PAYLOAD_SCHEMA_FLAGS_V1
384
+ #define NVTX_PAYLOAD_SCHEMA_FLAGS_V1
385
+
386
+ /**
387
+ * \brief Flags for static and dynamic schemas.
388
+ *
389
+ * The schema flags are used with @ref nvtxPayloadSchemaAttr_t::flags.
390
+ */
391
+ #define NVTX_PAYLOAD_SCHEMA_FLAG_NONE 0
392
+
393
+ /**
394
+ * This flag indicates that a schema and the corresponding payloads can
395
+ * contain fields which require a deep copy.
396
+ */
397
+ #define NVTX_PAYLOAD_SCHEMA_FLAG_DEEP_COPY (1 << 1)
398
+
399
+ /**
400
+ * This flag indicates that a schema and the corresponding payload can be
401
+ * referenced by another payload of the same event. If the schema is not
402
+ * intended to be visualized directly, it is possible use
403
+ * @ref NVTX_TYPE_PAYLOAD_SCHEMA_REFERENCED instead.
404
+ */
405
+ #define NVTX_PAYLOAD_SCHEMA_FLAG_REFERENCED (1 << 2)
406
+
407
+ /**
408
+ * The schema defines a counter group. An NVTX handler can expect that the schema
409
+ * contains entries with counter semantics.
410
+ */
411
+ #define NVTX_PAYLOAD_SCHEMA_FLAG_COUNTER_GROUP (1 << 3)
412
+
413
+ /**
414
+ * The schema defines a range or marker. An NVTX handler can expect that the
415
+ * schema contains a message and timestamp(s).
416
+ */
417
+ #define NVTX_PAYLOAD_SCHEMA_FLAG_RANGE_PUSHPOP (2 << 3)
418
+ #define NVTX_PAYLOAD_SCHEMA_FLAG_RANGE_STARTEND (3 << 3)
419
+ #define NVTX_PAYLOAD_SCHEMA_FLAG_MARK (4 << 3)
420
+
421
+ #endif /* NVTX_PAYLOAD_SCHEMA_FLAGS_V1 */
422
+
423
+
424
+ #ifndef NVTX_PAYLOAD_SCHEMA_ATTR_FIELDS_V1
425
+ #define NVTX_PAYLOAD_SCHEMA_ATTR_FIELDS_V1
426
+
427
+ /**
428
+ * The values allow the valid fields in @ref nvtxPayloadSchemaAttr_t to be
429
+ * specified via setting the field `fieldMask`.
430
+ */
431
+ #define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME (1 << 1)
432
+ #define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_TYPE (1 << 2)
433
+ #define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_FLAGS (1 << 3)
434
+ #define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_ENTRIES (1 << 4)
435
+ #define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NUM_ENTRIES (1 << 5)
436
+ #define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_STATIC_SIZE (1 << 6)
437
+ #define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_ALIGNMENT (1 << 7)
438
+ #define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_SCHEMA_ID (1 << 8)
439
+ #define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_EXTENSION (1 << 9)
440
+
441
+ #endif /* NVTX_PAYLOAD_SCHEMA_ATTR_FIELDS_V1 */
442
+
443
+
444
+ #ifndef NVTX_PAYLOAD_ENUM_ATTR_FIELDS_V1
445
+ #define NVTX_PAYLOAD_ENUM_ATTR_FIELDS_V1
446
+
447
+ /**
448
+ * The values are used to set the field `fieldMask` and specify which fields in
449
+ * @ref nvtxPayloadEnumAttr_t are set.
450
+ */
451
+ #define NVTX_PAYLOAD_ENUM_ATTR_FIELD_NAME (1 << 1)
452
+ #define NVTX_PAYLOAD_ENUM_ATTR_FIELD_ENTRIES (1 << 2)
453
+ #define NVTX_PAYLOAD_ENUM_ATTR_FIELD_NUM_ENTRIES (1 << 3)
454
+ #define NVTX_PAYLOAD_ENUM_ATTR_FIELD_SIZE (1 << 4)
455
+ #define NVTX_PAYLOAD_ENUM_ATTR_FIELD_SCHEMA_ID (1 << 5)
456
+ #define NVTX_PAYLOAD_ENUM_ATTR_FIELD_EXTENSION (1 << 6)
457
+
458
+ #endif /* NVTX_PAYLOAD_ENUM_ATTR_FIELDS_V1 */
459
+
460
+ /**
461
+ * An NVTX scope specifies the execution scope or source of events or counters.
462
+ * A tool determines the value for a predefined scope when the sample is taken.
463
+ */
464
+ #ifndef NVTX_SCOPES_V1
465
+ #define NVTX_SCOPES_V1
466
+
467
+ #define NVTX_SCOPE_NONE 0 /* No scope specified. */
468
+ #define NVTX_SCOPE_ROOT 1 /* The root in a hierarchy. */
469
+
470
+ /* Hardware events */
471
+ #define NVTX_SCOPE_CURRENT_HW_MACHINE 2 /* Node/machine name */
472
+ #define NVTX_SCOPE_CURRENT_HW_SOCKET 3
473
+ #define NVTX_SCOPE_CURRENT_HW_CPU_PHYSICAL 4 /* Physical CPU core */
474
+ #define NVTX_SCOPE_CURRENT_HW_CPU_LOGICAL 5 /* Logical CPU core */
475
+ /* Innermost HW execution context */
476
+ #define NVTX_SCOPE_CURRENT_HW_INNERMOST 15
477
+
478
+ /* Virtualized hardware, virtual machines */
479
+ #define NVTX_SCOPE_CURRENT_HYPERVISOR 16
480
+ #define NVTX_SCOPE_CURRENT_VM 17
481
+ #define NVTX_SCOPE_CURRENT_KERNEL 18
482
+ #define NVTX_SCOPE_CURRENT_CONTAINER 19
483
+ #define NVTX_SCOPE_CURRENT_OS 20
484
+
485
+ /* Software scopes */
486
+ #define NVTX_SCOPE_CURRENT_SW_PROCESS 21 /* Process scope */
487
+ #define NVTX_SCOPE_CURRENT_SW_THREAD 22 /* Thread scope */
488
+ /* Innermost SW execution context */
489
+ #define NVTX_SCOPE_CURRENT_SW_INNERMOST 31
490
+
491
+ /** Static (user-provided) scope IDs (feed forward) */
492
+ #define NVTX_SCOPE_ID_STATIC_START (1 << 24)
493
+
494
+ /* Dynamically (tool) generated scope IDs */
495
+ #define NVTX_SCOPE_ID_DYNAMIC_START (NVTX_STATIC_CAST(uint64_t, 1) << 32)
496
+
497
+ #endif /* NVTX_SCOPES_V1 */
498
+
499
+ #ifndef NVTX_TIME_V1
500
+ #define NVTX_TIME_V1
501
+
502
+ /**
503
+ * Timestamp source is not known, e.g. NIC or switch. The NVTX handler can
504
+ * assume that at least two synchronization points are created with NVTX
505
+ * instrumentation.
506
+ */
507
+ #define NVTX_TIMESTAMP_TYPE_NONE 0
508
+
509
+ /** The timestamp was provided by the NVTX handler via `nvtxTimestampGet()`. */
510
+ #define NVTX_TIMESTAMP_TYPE_TOOL_PROVIDED 1
511
+
512
+ /** CPU timestamp sources */
513
+ #define NVTX_TIMESTAMP_TYPE_CPU_TSC /* RDTSC on x86, CNTVCT on ARM */ 10
514
+ #define NVTX_TIMESTAMP_TYPE_CPU_TSC_NONVIRTUALIZED /* CNTPCT on ARM */ 11
515
+ #define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_REALTIME 12
516
+ #define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_REALTIME_COARSE 13
517
+ #define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_MONOTONIC 14
518
+ #define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_MONOTONIC_RAW 15
519
+ #define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_MONOTONIC_COARSE 16
520
+ #define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_BOOTTIME 17
521
+ #define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_PROCESS_CPUTIME_ID 18
522
+ #define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_THREAD_CPUTIME_ID 19
523
+
524
+ #define NVTX_TIMESTAMP_TYPE_WIN_QPC 30
525
+ #define NVTX_TIMESTAMP_TYPE_WIN_GSTAFT 31
526
+ #define NVTX_TIMESTAMP_TYPE_WIN_GSTAFTP 32
527
+
528
+ #define NVTX_TIMESTAMP_TYPE_C_TIME 40
529
+ #define NVTX_TIMESTAMP_TYPE_C_CLOCK 41
530
+ #define NVTX_TIMESTAMP_TYPE_C_TIMESPEC_GET 42
531
+
532
+ #define NVTX_TIMESTAMP_TYPE_CPP_STEADY_CLOCK 50
533
+ #define NVTX_TIMESTAMP_TYPE_CPP_HIGH_RESOLUTION_CLOCK 51
534
+ #define NVTX_TIMESTAMP_TYPE_CPP_SYSTEM_CLOCK 52
535
+ #define NVTX_TIMESTAMP_TYPE_CPP_UTC_CLOCK 53
536
+ #define NVTX_TIMESTAMP_TYPE_CPP_TAI_CLOCK 54
537
+ #define NVTX_TIMESTAMP_TYPE_CPP_GPS_CLOCK 55
538
+ #define NVTX_TIMESTAMP_TYPE_CPP_FILE_CLOCK 56
539
+
540
+ /** GPU timestamp sources */
541
+ #define NVTX_TIMESTAMP_TYPE_GPU_GLOBALTIMER 80 /* e.g. PTIMER */
542
+
543
+ /** Returned by `nvtxTimeDomainRegister` if time domain registration failed. */
544
+ #define NVTX_TIME_DOMAIN_ID_NONE 0
545
+
546
+ /** Static (user-provided) time domain IDs (feed forward) */
547
+ #define NVTX_TIME_DOMAIN_ID_STATIC_START (1 << 24)
548
+
549
+ /* Dynamically (tool) generated time domain IDs */
550
+ #define NVTX_TIME_DOMAIN_ID_DYNAMIC_START (NVTX_STATIC_CAST(uint64_t, 1) << 32)
551
+
552
+ /** Timer properties */
553
+ #define NVTX_TIMER_FLAG_NONE 0
554
+ #define NVTX_TIMER_FLAG_CLOCK_MONOTONIC (1 << 1)
555
+ #define NVTX_TIMER_FLAG_CLOCK_STEADY (1 << 2)
556
+
557
+ /** Point in time when the timer starts (its value is 0). */
558
+ #define NVTX_TIMER_START_UNKNOWN 0
559
+ #define NVTX_TIMER_START_SYSTEM_BOOT 1
560
+ #define NVTX_TIMER_START_VM_BOOT 2
561
+ #define NVTX_TIMER_START_UNIX_EPOCH 3 /* 1 January 1970 */
562
+ #define NVTX_TIMER_START_WIN_FILETIME 4 /* 1 January 1601 */
563
+
564
+ /**
565
+ * Flags specifying whether it is safe or unsafe to call the timestamp
566
+ * provider after process teardown.
567
+ */
568
+ #define NVTX_TIMER_SOURCE_SAFE_CALL_AFTER_PROCESS_TEARDOWN 0
569
+ #define NVTX_TIMER_SOURCE_UNSAFE_CALL_AFTER_PROCESS_TEARDOWN 1
570
+
571
+ #endif /* NVTX_TIME_V1 */
572
+
573
+ #ifndef NVTX_BATCH_FLAGS_V1
574
+ #define NVTX_BATCH_FLAGS_V1
575
+
576
+ /**
577
+ * Timestamp ordering flags for a batch of deferred events or counters.
578
+ * By default, chronological order by the first timestamp of the event or
579
+ * counter is assumed.
580
+ */
581
+ #define NVTX_BATCH_FLAG_TIME_SORTED 0
582
+ #define NVTX_BATCH_FLAG_TIME_SORTED_PARTIALLY (1 << 1)
583
+ #define NVTX_BATCH_FLAG_TIME_SORTED_PER_SCOPE (2 << 1)
584
+ #define NVTX_BATCH_FLAG_UNSORTED (3 << 1)
585
+
586
+ #endif /* NVTX_BATCH_FLAGS_V1 */
587
+
588
+ #ifdef __cplusplus
589
+ extern "C" {
590
+ #endif /* __cplusplus */
591
+
592
+ #ifndef NVTX_PAYLOAD_TYPEDEFS_V1
593
+ #define NVTX_PAYLOAD_TYPEDEFS_V1
594
+
595
+ /**
596
+ * \brief Size and alignment information for predefined payload entry types.
597
+ *
598
+ * The struct contains the size and the alignment size in bytes. A respective
599
+ * array for the predefined types is passed via nvtxExtModuleInfo_t to the NVTX
600
+ * client/handler. The type (ID) is used as index into this array.
601
+ */
602
+ typedef struct nvtxPayloadEntryTypeInfo_v1
603
+ {
604
+ uint16_t size;
605
+ uint16_t align;
606
+ } nvtxPayloadEntryTypeInfo_t;
607
+
608
+ /**
609
+ * \brief Binary payload data, size and decoding information.
610
+ *
611
+ * An array of type `nvtxPayloadData_t` is passed to the NVTX event attached to
612
+ * an NVTX event via the `payload.ullvalue` field of NVTX event attributes.
613
+ *
614
+ * The `schemaId` be a predefined schema entry type (`NVTX_PAYLOAD_ENTRY_TYPE*`),
615
+ * a schema ID (statically specified or dynamically created) or one of
616
+ * `NVTX_PAYLOAD_TYPE_REFERENCED` or `NVTX_PAYLOAD_TYPE_RAW`.
617
+ *
618
+ * Setting the size of a payload to `MAX_SIZE` can be useful to reduce the
619
+ * overhead of NVTX instrumentation, when no NVTX handler is attached. However,
620
+ * a tool might not be able to detect the size of a payload and thus skip it.
621
+ * A reasonable use case is a payload that represents a null-terminated
622
+ * C string, where the NVTX handler can call `strlen()`.
623
+ */
624
+ typedef struct nvtxPayloadData_v1
625
+ {
626
+ /**
627
+ * The schema ID, which defines the layout of the binary data.
628
+ */
629
+ uint64_t schemaId;
630
+
631
+ /**
632
+ * Size of the payload (blob) in bytes. `SIZE_MAX` (`-1`) indicates the tool
633
+ * that it should figure out the size, which might not be possible.
634
+ */
635
+ size_t size;
636
+
637
+ /**
638
+ * Pointer to the binary payload data.
639
+ */
640
+ const void* payload;
641
+ } nvtxPayloadData_t;
642
+
643
+
644
+ /**
645
+ * \brief Header of the payload entry's semantic field.
646
+ *
647
+ * If the semantic field of the payload schema entry is set, the first four
648
+ * fields (header) are defined with this type. A tool can iterate through the
649
+ * extensions and check, if it supports (can handle) it.
650
+ */
651
+ typedef struct nvtxSemanticsHeader_v1
652
+ {
653
+ uint32_t structSize; /** Size of semantic extension struct. */
654
+ uint16_t semanticId;
655
+ uint16_t version;
656
+ const struct nvtxSemanticsHeader_v1* next; /** linked list */
657
+ /* Additional fields are defined by the specific semantic extension. */
658
+ } nvtxSemanticsHeader_t;
659
+
660
+ /**
661
+ * \brief Entry in a schema.
662
+ *
663
+ * A payload schema consists of an array of payload schema entries. It is
664
+ * registered with @ref nvtxPayloadSchemaRegister. `flag` can be set to `0` for
665
+ * simple values, 'type' is the only "required" field. If not set explicitly,
666
+ * all other fields are zero-initialized, which means that the entry has no name
667
+ * and the offset is determined based on self-alignment rules.
668
+ *
669
+ * Example schema:
670
+ * nvtxPayloadSchemaEntry_t schema[] = {
671
+ * {0, NVTX_EXT_PAYLOAD_TYPE_UINT8, "one byte"},
672
+ * {0, NVTX_EXT_PAYLOAD_TYPE_INT32, "four bytes"}
673
+ * };
674
+ */
675
+ typedef struct nvtxPayloadSchemaEntry_v1
676
+ {
677
+ /**
678
+ * \brief Flags to augment the basic type.
679
+ *
680
+ * This field allows additional properties of the payload entry to be
681
+ * specified. Valid values are `NVTX_PAYLOAD_ENTRY_FLAG_*`.
682
+ */
683
+ uint64_t flags;
684
+
685
+ /**
686
+ * \brief Predefined payload schema entry type or custom schema ID.
687
+ *
688
+ * Predefined types are `NVTX_PAYLOAD_ENTRY_TYPE_*`. Passing a schema ID
689
+ * enables nesting of schemas.
690
+ */
691
+ uint64_t type;
692
+
693
+ /**
694
+ * \brief Name or label of the payload entry. (Optional)
695
+ *
696
+ * A meaningful name or label can help organizing and interpreting the data.
697
+ */
698
+ const char* name;
699
+
700
+ /**
701
+ * \brief Description of the payload entry. (Optional)
702
+ *
703
+ * A more detail description of the data that is stored with this entry.
704
+ */
705
+ const char* description;
706
+
707
+ /**
708
+ * \brief String length, array length or member selector for union types.
709
+ *
710
+ * If @ref type is a C string type, this field specifies the string length.
711
+ *
712
+ * If @ref flags specify that the entry is an array, this field specifies
713
+ * the array length. See `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_*` for more details.
714
+ *
715
+ * If @ref type is a union with schema type @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION
716
+ * (external selection of the union member), this field contains the index
717
+ * (starting with 0) to an entry of integral type in the same schema. The
718
+ * associated field value specifies the selected union member.
719
+ *
720
+ * @note An array of schema type @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION is not
721
+ * supported. @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION_WITH_INTERNAL_SELECTOR can
722
+ * be used instead.
723
+ */
724
+ uint64_t arrayOrUnionDetail;
725
+
726
+ /**
727
+ * \brief Offset in the binary payload data (in bytes).
728
+ *
729
+ * This field specifies the byte offset from the base address of the actual
730
+ * binary data (blob) to the start address of the data of this entry.
731
+ *
732
+ * It is recommended (but not required) to provide the offset it. Otherwise,
733
+ * the NVTX handler will determine the offset from natural alignment rules.
734
+ * In some cases, e.g. dynamic schema layouts, the offset cannot be set and
735
+ * has to be determined based on the data of prior entries.
736
+ *
737
+ * Setting the offset can also be used to skip entries during payload parsing.
738
+ */
739
+ uint64_t offset;
740
+
741
+ /**
742
+ * \brief Additional semantics of the payload entry.
743
+ *
744
+ * The field points to the first element in a linked list, which enables
745
+ * multiple semantic extensions.
746
+ */
747
+ const nvtxSemanticsHeader_t* semantics;
748
+
749
+ /**
750
+ * \brief Reserved for future use. Do not use it!
751
+ */
752
+ const void* reserved;
753
+ } nvtxPayloadSchemaEntry_t;
754
+
755
+ /**
756
+ * \brief NVTX payload schema attributes.
757
+ */
758
+ typedef struct nvtxPayloadSchemaAttr_v1
759
+ {
760
+ /**
761
+ * \brief Mask of valid fields in this struct.
762
+ *
763
+ * Use the `NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_*` defines.
764
+ */
765
+ uint64_t fieldMask;
766
+
767
+ /**
768
+ * \brief Name of the payload schema. (Optional)
769
+ */
770
+ const char* name;
771
+
772
+ /**
773
+ * \brief Payload schema type. (Mandatory) \anchor PAYLOAD_TYPE_FIELD
774
+ *
775
+ * Use the `NVTX_PAYLOAD_SCHEMA_TYPE_*` defines.
776
+ */
777
+ uint64_t type;
778
+
779
+ /**
780
+ * \brief Payload schema flags. (Optional)
781
+ *
782
+ * Flags defined by `NVTX_PAYLOAD_SCHEMA_FLAG_*` can be used to set
783
+ * additional properties of the schema.
784
+ */
785
+ uint64_t flags;
786
+
787
+ /**
788
+ * \brief Entries of a payload schema. (Mandatory) \anchor ENTRIES_FIELD
789
+ *
790
+ * This field is a pointer to an array of schema entries, each describing a
791
+ * field in a data structure, e.g. in a C struct or union.
792
+ */
793
+ const nvtxPayloadSchemaEntry_t* entries;
794
+
795
+ /**
796
+ * \brief Number of entries in the payload schema. (Mandatory)
797
+ *
798
+ * Number of entries in the array of payload entries \ref ENTRIES_FIELD.
799
+ */
800
+ size_t numEntries;
801
+
802
+ /**
803
+ * \brief The binary payload size in bytes for static payload schemas.
804
+ *
805
+ * If \ref PAYLOAD_TYPE_FIELD is @ref NVTX_PAYLOAD_SCHEMA_TYPE_DYNAMIC this
806
+ * value is ignored. If this field is not specified for a schema of type
807
+ * @ref NVTX_PAYLOAD_SCHEMA_TYPE_STATIC, the size can be automatically
808
+ * determined by a tool.
809
+ */
810
+ size_t payloadStaticSize;
811
+
812
+ /**
813
+ * \brief The byte alignment for packed structures.
814
+ *
815
+ * If not specified, this field defaults to `0`, which means that the fields
816
+ * in the data structure are not packed and natural alignment rules can be
817
+ * applied.
818
+ */
819
+ size_t packAlign;
820
+
821
+ /**
822
+ * A static payload schema ID must be unique within the domain,
823
+ * >= NVTX_PAYLOAD_SCHEMA_ID_STATIC_START and
824
+ * < NVTX_PAYLOAD_SCHEMA_ID_DYNAMIC_START
825
+ */
826
+ uint64_t schemaId;
827
+
828
+ /**
829
+ * Flexible extension for schema attributes.
830
+ * (Do not use. Reserved for future use.)
831
+ */
832
+ void* extension;
833
+ } nvtxPayloadSchemaAttr_t;
834
+
835
+ /**
836
+ * \brief This type is used to describe an enumeration.
837
+ *
838
+ * Since the value of an enum entry might not be meaningful for the analysis
839
+ * and/or visualization, a tool can show the name of enum entry instead.
840
+ *
841
+ * An array of this struct is passed to @ref nvtxPayloadEnumAttr_t::entries to be
842
+ * finally registered via @ref nvtxPayloadEnumRegister with the NVTX handler.
843
+ *
844
+ * @note EXPERIMENTAL
845
+ */
846
+ typedef struct nvtxPayloadEnum_v1
847
+ {
848
+ /**
849
+ * Name of the enum value.
850
+ */
851
+ const char* name;
852
+
853
+ /**
854
+ * Value of the enum entry.
855
+ */
856
+ uint64_t value;
857
+
858
+ /**
859
+ * Indicates that this entry sets a specific set of bits, which can be used
860
+ * to define bitsets.
861
+ */
862
+ int8_t isFlag;
863
+ } nvtxPayloadEnum_t;
864
+
865
+ /**
866
+ * \brief NVTX payload enumeration type attributes.
867
+ *
868
+ * A pointer to this struct is passed to @ref nvtxPayloadEnumRegister.
869
+ */
870
+ typedef struct nvtxPayloadEnumAttr_v1
871
+ {
872
+ /**
873
+ * Mask of valid fields in this struct. See `NVTX_PAYLOAD_ENUM_ATTR_FIELD_*`.
874
+ */
875
+ uint64_t fieldMask;
876
+
877
+ /**
878
+ * Name of the enum. (Optional)
879
+ */
880
+ const char* name;
881
+
882
+ /**
883
+ * Entries of the enum. (Mandatory)
884
+ */
885
+ const nvtxPayloadEnum_t* entries;
886
+
887
+ /**
888
+ * Number of entries in the enum. (Mandatory)
889
+ */
890
+ size_t numEntries;
891
+
892
+ /**
893
+ * Size of enumeration type in bytes
894
+ */
895
+ size_t sizeOfEnum;
896
+
897
+ /**
898
+ * A static payload schema ID must be unique within the domain,
899
+ * >= NVTX_PAYLOAD_SCHEMA_ID_STATIC_START and
900
+ * < NVTX_PAYLOAD_SCHEMA_ID_DYNAMIC_START
901
+ */
902
+ uint64_t schemaId;
903
+
904
+ /**
905
+ * Flexible extension for enumeration attributes.
906
+ * (Do not use. Reserved for future use.)
907
+ */
908
+ void* extension;
909
+ } nvtxPayloadEnumAttr_t;
910
+
911
+ typedef struct nvtxScopeAttr_v1
912
+ {
913
+ size_t structSize;
914
+
915
+ /**
916
+ * Path delimited by '/' characters, relative to @ref parentScope. Leading
917
+ * slashes are ignored. Nodes in the path may use name[key] syntax to
918
+ * indicate an array of sibling nodes, which may be combined with other
919
+ * non-array nodes or different arrays at the same scope. Node names should
920
+ * be UTF8 printable characters. '\' has to be used to escape '/', '[', and
921
+ * ']' characters in node names. An empty C string "" and `NULL` are valid
922
+ * inputs and treated equivalently.
923
+ *
924
+ * A GPU can be specified using its:
925
+ * - Unique identifier (UUID) with "GPU[UUID:#]",
926
+ * - CUDA device ID (sensitive to CUDA_VISIBLE_DEVICES) with "GPU[CUDAID:#]",
927
+ * - NVML (nvidia-smi) device ID with "GPU[NVSMI:#]"
928
+ *
929
+ * (replace `#` with the actual device ID).
930
+ * For display purposes, a tool is recommended to show a pretty name.
931
+ * To clearly identify a GPU, the @ref parentScope should also match
932
+ * the GPU's execution context.
933
+ */
934
+ const char* path;
935
+
936
+ /** Identifier of the parent scope, to which `path` is appended. */
937
+ uint64_t parentScope;
938
+
939
+ /**
940
+ * Static scope ID. Must be unique within the domain,
941
+ * >= NVTX_SCOPE_ID_STATIC_START, and < NVTX_SCOPE_ID_DYNAMIC_START.
942
+ * Use NVTX_SCOPE_NONE to let the tool create a (dynamic) scope ID.
943
+ */
944
+ uint64_t scopeId;
945
+ } nvtxScopeAttr_t;
946
+
947
+ #endif /* NVTX_PAYLOAD_TYPEDEFS_V1 */
948
+
949
+ #ifndef NVTX_PAYLOAD_TYPEDEFS_DEFERRED_V1
950
+ #define NVTX_PAYLOAD_TYPEDEFS_DEFERRED_V1
951
+
952
+ /** Attributes of an NVTX time domain. */
953
+ typedef struct nvtxTimeDomainAttr_v1
954
+ {
955
+ /** Identifyer of the NVTX scope the time domain is associated with. */
956
+ uint64_t scopeId;
957
+
958
+ /** Predefined `NVTX_TIMESTAMP_TYPE_*`. */
959
+ uint64_t timestampTypeId;
960
+
961
+ /**
962
+ * Static (feed-forward) time domain ID. `0` makes the tool generate the ID.
963
+ * The static schema ID must be >= NVTX_TIME_DOMAIN_ID_STATIC_START and
964
+ * < NVTX_TIME_DOMAIN_ID_DYNAMIC_START
965
+ */
966
+ uint64_t timeDomainId;
967
+
968
+ /** Properties of the timer (use NVTX_TIMER_FLAG_*). */
969
+ uint64_t timerFlags;
970
+
971
+ /** Ticks per second (0 means unknown). */
972
+ int64_t timerResolution;
973
+
974
+ /** Point in time when the timer starts (use NVTX_TIMER_START_*). */
975
+ uint64_t timerStart;
976
+ } nvtxTimeDomainAttr_t;
977
+
978
+ /** Synchronization point between two time domains. */
979
+ typedef struct nvtxSyncPoint_v1
980
+ {
981
+ int64_t src;
982
+ int64_t dst;
983
+ } nvtxSyncPoint_t;
984
+
985
+ /**
986
+ * \brief Helper struct to submit a batch of events (marks or ranges).
987
+ *
988
+ * By default, events are assumed to be chronologically sorted by the first
989
+ * timestamp in the event (start time in a range). If the events are not sorted,
990
+ * the `flags` field must be set accordingly (see `NVTX_BATCH_FLAG_*`).
991
+ */
992
+ typedef struct nvtxEventBatch_v1
993
+ {
994
+ /**
995
+ * Identifier of the data layout of a deferred event in the array of events.
996
+ * Only layouts with static payload size are allowed. The size of an event
997
+ * in the array is specified by the static payload size during the schema
998
+ * registration. The time domain of event timestamps is provided via time
999
+ * semantics in the schema registration.
1000
+ */
1001
+ uint64_t eventSchemaId;
1002
+
1003
+ /** Size of the array of deferred events (in bytes). */
1004
+ size_t size;
1005
+
1006
+ /** Pointer to the array of deferred events. */
1007
+ const void* events;
1008
+
1009
+ /** Scope of all events or counters in the batch. */
1010
+ uint64_t scope;
1011
+
1012
+ /** Timestamp ordering (sorted, partially sorted, unsorted), etc. */
1013
+ uint64_t flags;
1014
+
1015
+ /** Flexible data which can be referenced by events in the batch. */
1016
+ const void* flexData;
1017
+
1018
+ /** Size of the flexible data memory blob. */
1019
+ size_t flexDataSize;
1020
+
1021
+ /**
1022
+ * Offset from the `flexData` pointer to the begin of the flexible data
1023
+ * in bytes.
1024
+ */
1025
+ size_t flexDataOffset;
1026
+ } nvtxEventBatch_t;
1027
+
1028
+ #endif /* NVTX_PAYLOAD_TYPEDEFS_DEFERRED_V1 */
1029
+
1030
+ #ifndef NVTX_PAYLOAD_API_FUNCTIONS_V1
1031
+ #define NVTX_PAYLOAD_API_FUNCTIONS_V1
1032
+
1033
+ /**
1034
+ * \brief Register a payload schema.
1035
+ *
1036
+ * @param domain NVTX domain handle.
1037
+ * @param attr NVTX payload schema attributes.
1038
+ */
1039
+ NVTX_DECLSPEC uint64_t NVTX_API nvtxPayloadSchemaRegister(
1040
+ nvtxDomainHandle_t domain,
1041
+ const nvtxPayloadSchemaAttr_t* attr);
1042
+
1043
+ /**
1044
+ * \brief Register an enumeration type with the payload extension.
1045
+ *
1046
+ * @param domain NVTX domain handle
1047
+ * @param attr NVTX payload enumeration type attributes.
1048
+ */
1049
+ NVTX_DECLSPEC uint64_t NVTX_API nvtxPayloadEnumRegister(
1050
+ nvtxDomainHandle_t domain,
1051
+ const nvtxPayloadEnumAttr_t* attr);
1052
+
1053
+ /**
1054
+ * \brief Register a scope.
1055
+ *
1056
+ * @param domain NVTX domain handle
1057
+ * @param attr Scope attributes.
1058
+ *
1059
+ * @return an identifier for the scope. If the operation was not successful,
1060
+ * `NVTX_SCOPE_NONE` is returned.
1061
+ */
1062
+ NVTX_DECLSPEC uint64_t NVTX_API nvtxScopeRegister(
1063
+ nvtxDomainHandle_t domain,
1064
+ const nvtxScopeAttr_t* attr);
1065
+
1066
+ /**
1067
+ * \brief Marks an instantaneous event in the application with the attributes
1068
+ * being passed via the extended payload.
1069
+ *
1070
+ * An NVTX handler can assume that the payload contains the event message.
1071
+ * Otherwise, it might ignore the event.
1072
+ *
1073
+ * @param domain NVTX domain handle
1074
+ * @param payloadData pointer to an array of structured payloads.
1075
+ * @param count number of payload BLOBs.
1076
+ */
1077
+ NVTX_DECLSPEC void NVTX_API nvtxMarkPayload(
1078
+ nvtxDomainHandle_t domain,
1079
+ const nvtxPayloadData_t* payloadData,
1080
+ size_t count);
1081
+
1082
+ /**
1083
+ * \brief Begin a nested thread range with the attributes being passed via the
1084
+ * payload.
1085
+ *
1086
+ * @param domain NVTX domain handle
1087
+ * @param payloadData Pointer to an array of extended payloads.
1088
+ * @param count Number of payloads.
1089
+ *
1090
+ * @return The level of the range being ended. If an error occurs a negative
1091
+ * value is returned on the current thread.
1092
+ */
1093
+ NVTX_DECLSPEC int NVTX_API nvtxRangePushPayload(
1094
+ nvtxDomainHandle_t domain,
1095
+ const nvtxPayloadData_t* payloadData,
1096
+ size_t count);
1097
+
1098
+ /**
1099
+ * \brief End a nested thread range with an additional custom payload.
1100
+ *
1101
+ * NVTX event attributes passed to this function (via the payloads) overwrite
1102
+ * event attributes (message and color) that have been set in the push event.
1103
+ * Other payload entries extend the data of the range.
1104
+ *
1105
+ * @param domain NVTX domain handle
1106
+ * @param payloadData pointer to an array of structured payloads.
1107
+ * @param count number of payload BLOBs.
1108
+ *
1109
+ * @return The level of the range being ended. If an error occurs a negative
1110
+ * value is returned on the current thread.
1111
+ */
1112
+ NVTX_DECLSPEC int NVTX_API nvtxRangePopPayload(
1113
+ nvtxDomainHandle_t domain,
1114
+ const nvtxPayloadData_t* payloadData,
1115
+ size_t count);
1116
+
1117
+ /**
1118
+ * \brief Start a thread range with attributes passed via the extended payload.
1119
+ *
1120
+ * @param domain NVTX domain handle
1121
+ * @param payloadData pointer to an array of structured payloads.
1122
+ * @param count number of payload BLOBs.
1123
+ *
1124
+ * @return The level of the range being ended. If an error occurs a negative
1125
+ * value is returned on the current thread.
1126
+ */
1127
+ NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartPayload(
1128
+ nvtxDomainHandle_t domain,
1129
+ const nvtxPayloadData_t* payloadData,
1130
+ size_t count);
1131
+
1132
+ /**
1133
+ * \brief End a thread range and pass a custom payload.
1134
+ *
1135
+ * NVTX event attributes passed to this function (via the payloads) overwrite
1136
+ * event attributes (message and color) that have been set in the start event.
1137
+ * Other payload entries extend the data of the range.
1138
+ *
1139
+ * @param domain NVTX domain handle
1140
+ * @param id The correlation ID returned from a NVTX range start call.
1141
+ * @param payloadData pointer to an array of structured payloads.
1142
+ * @param count number of payload BLOBs.
1143
+ */
1144
+ NVTX_DECLSPEC void NVTX_API nvtxRangeEndPayload(
1145
+ nvtxDomainHandle_t domain,
1146
+ nvtxRangeId_t id,
1147
+ const nvtxPayloadData_t* payloadData,
1148
+ size_t count);
1149
+
1150
+ /**
1151
+ * \brief Checks if the given NVTX domain is enabled.
1152
+ *
1153
+ * This function can be used to guard expensive code instrumentation.
1154
+ * In general, it is recommended to avoid different execution branches based on
1155
+ * NVTX instrumenation.
1156
+ *
1157
+ * If no tool is attached, this function will always return `0`.
1158
+ * If a tool is attached, but does not handle this function, `1` is returned.
1159
+ * If a tool is attached and handles this function, the return value is
1160
+ * determined by the tool. Positive (>0) return values indicate that the domain
1161
+ * is enabled, `0` indicates that the domain is disabled.
1162
+ *
1163
+ * @param domain NVTX domain handle
1164
+ * @return 0 if the domain is disabled. Values > 0 indicate an enabled domain.
1165
+ */
1166
+ NVTX_DECLSPEC uint8_t NVTX_API nvtxDomainIsEnabled(
1167
+ nvtxDomainHandle_t domain);
1168
+
1169
+ #endif /* NVTX_PAYLOAD_API_FUNCTIONS_V1 */
1170
+
1171
+ #ifndef NVTX_PAYLOAD_API_FUNCTIONS_DEFERRED_V1
1172
+ #define NVTX_PAYLOAD_API_FUNCTIONS_DEFERRED_V1
1173
+
1174
+ /**
1175
+ * Get a timestamp from the NVTX handler or tool. If no tool is attached, the
1176
+ * CPU TSC might be returned. No guarantees are made.
1177
+ * The returned timestamp is just meant to be used in deferred events/counters.
1178
+ */
1179
+ NVTX_DECLSPEC int64_t NVTX_API nvtxTimestampGet(void);
1180
+
1181
+ /**
1182
+ * Register a time domain. Associates an NVTX scope with the time domain.
1183
+ * Timestamps of NVTX events or counters in the scope are interpreted according
1184
+ * to the time domain definitions.
1185
+ *
1186
+ * @param domain NVTX domain handle.
1187
+ * @param timeAttr Time domain attributes (timestamp type, scope, flags, etc.).
1188
+ * @return time domain ID.
1189
+ */
1190
+ NVTX_DECLSPEC uint64_t NVTX_API nvtxTimeDomainRegister(
1191
+ nvtxDomainHandle_t domain,
1192
+ const nvtxTimeDomainAttr_t* timeAttr);
1193
+
1194
+ /**
1195
+ * Provide the pointer to a function that returns a timestamp.
1196
+ * This enables the tool to create time synchronization points.
1197
+ *
1198
+ * @param domain NVTX domain handle.
1199
+ * @param timeDomainId time domain identifier or timestamp type ID, if it is
1200
+ * unambiguous.
1201
+ * @param flags indicates if it is safe to call the timestamp provider after
1202
+ * process teardown.
1203
+ * @param timestampProviderFn Pointer to a function that returns a timestamp.
1204
+ */
1205
+ NVTX_DECLSPEC void NVTX_API nvtxTimerSource(
1206
+ nvtxDomainHandle_t domain,
1207
+ uint64_t timeDomainId,
1208
+ uint64_t flags,
1209
+ int64_t (*timestampProviderFn)(void));
1210
+
1211
+ /**
1212
+ * Same as `nvtxTimerSource`, but with an additional data pointer argument.
1213
+ *
1214
+ * @param domain NVTX domain handle.
1215
+ * @param timeDomainId time domain identifier or timestamp type ID, if it is
1216
+ * unambiguous.
1217
+ * @param flags indicates if it is safe to call the timestamp provider after
1218
+ * process teardown.
1219
+ * @param timestampProviderFn Pointer to a function that returns a timestamp.
1220
+ * @param data Pointer to data that is passed to the timestamp provider function.
1221
+ */
1222
+ NVTX_DECLSPEC void NVTX_API nvtxTimerSourceWithData(
1223
+ nvtxDomainHandle_t domain,
1224
+ uint64_t timeDomainId,
1225
+ uint64_t flags,
1226
+ int64_t (*timestampProviderFn)(void* data),
1227
+ void* data);
1228
+
1229
+ /**
1230
+ * Provides a synchronization point between two time domains.
1231
+ * Two synchronization points are required to enable a timestamp conversion.
1232
+ * The tool must know one of the time domains or it least must be able to chain
1233
+ * conversions to enable the conversion between the given timestamps.
1234
+ *
1235
+ * @param domain NVTX domain handle.
1236
+ * @param timeDomainId1 time domain 1 ID or timestamp type ID, if it is
1237
+ * unambiguous.
1238
+ * @param timeDomainId2 time domain 2 ID or timestamp type ID, if it is
1239
+ * unambiguous.
1240
+ * @param timestamp1 Timestamp in the first time domain.
1241
+ * @param timestamp2 Timestamp in the second time domain.
1242
+ */
1243
+ NVTX_DECLSPEC void NVTX_API nvtxTimeSyncPoint(
1244
+ nvtxDomainHandle_t domain,
1245
+ uint64_t timeDomainId1,
1246
+ uint64_t timeDomainId2,
1247
+ int64_t timestamp1,
1248
+ int64_t timestamp2);
1249
+
1250
+ /**
1251
+ * The same as `nvtxTimeSyncPoint` but with multiple synchronization points.
1252
+ *
1253
+ * @param domain NVTX domain handle.
1254
+ * @param timeDomainIdSrc source time domain ID or timestamp type ID, if it is
1255
+ * unambiguous.
1256
+ * @param timeDomainIdDst destination time domain ID or timestamp type ID, if it
1257
+ * is unambiguous.
1258
+ * @param syncPoints Pointer to an array of synchronization points.
1259
+ * @param count Number of synchronization points.
1260
+ */
1261
+ NVTX_DECLSPEC void NVTX_API nvtxTimeSyncPointTable(
1262
+ nvtxDomainHandle_t domain,
1263
+ uint64_t timeDomainIdSrc,
1264
+ uint64_t timeDomainIdDst,
1265
+ const nvtxSyncPoint_t* syncPoints,
1266
+ size_t count);
1267
+
1268
+ /**
1269
+ * @brief Pass a conversion factor between two time domains to the NVTX handler.
1270
+ *
1271
+ * @param domain NVTX domain handle.
1272
+ * @param timeDomainIdSrc source time domain ID or timestamp type ID, if it is
1273
+ * unambiguous.
1274
+ * @param timeDomainIdDst destination time domain ID or timestamp type ID, if it
1275
+ * is unambiguous.
1276
+ * @param slope Conversion factor between the two time domains.
1277
+ * @param timestampSrc Timestamp in the source time domain.
1278
+ * @param timestampDst Timestamp in the destination time domain.
1279
+ */
1280
+ NVTX_DECLSPEC void NVTX_API nvtxTimestampConversionFactor(
1281
+ nvtxDomainHandle_t domain,
1282
+ uint64_t timeDomainIdSrc,
1283
+ uint64_t timeDomainIdDst,
1284
+ double slope,
1285
+ int64_t timestampSrc,
1286
+ int64_t timestampDst);
1287
+
1288
+ /**
1289
+ * @brief Submit one deferred event.
1290
+ *
1291
+ * @param domain NVTX domain handle.
1292
+ * @param payloadData Pointer to an array of structured payloads.
1293
+ * @param numPayloads Number of payloads of the event.
1294
+ */
1295
+ NVTX_DECLSPEC void NVTX_API nvtxEventSubmit(
1296
+ nvtxDomainHandle_t domain,
1297
+ const nvtxPayloadData_t* payloadData,
1298
+ size_t numPayloads);
1299
+
1300
+ /**
1301
+ * \brief Submit a batch of deferred events in the given domain.
1302
+ *
1303
+ * @param domain NVTX domain handle.
1304
+ * @param eventBatch Pointer to deferred events batch details.
1305
+ */
1306
+ NVTX_DECLSPEC void NVTX_API nvtxEventBatchSubmit(
1307
+ nvtxDomainHandle_t domain,
1308
+ const nvtxEventBatch_t* eventBatch);
1309
+
1310
+ #endif /* NVTX_PAYLOAD_API_FUNCTIONS_DEFERRED_V1 */
1311
+
1312
+ /**
1313
+ * \brief Callback IDs of API functions in the payload extension.
1314
+ *
1315
+ * The NVTX handler can use these values to register a handler function. When
1316
+ * `InitializeInjectionNvtxExtension(nvtxExtModuleInfo_t* moduleInfo)` is
1317
+ * executed, a handler routine can be registered as follows:
1318
+ * \code{.c}
1319
+ * moduleInfo->segments->slots[NVTX3EXT_CBID_nvtxPayloadSchemaRegister] =
1320
+ * (intptr_t)PayloadSchemaRegisterHandlerFn;
1321
+ * \endcode
1322
+ */
1323
+ #ifndef NVTX_PAYLOAD_CALLBACK_ID_V1
1324
+ #define NVTX_PAYLOAD_CALLBACK_ID_V1
1325
+
1326
+ #define NVTX3EXT_CBID_nvtxPayloadSchemaRegister 0
1327
+ #define NVTX3EXT_CBID_nvtxPayloadEnumRegister 1
1328
+ #define NVTX3EXT_CBID_nvtxMarkPayload 2
1329
+ #define NVTX3EXT_CBID_nvtxRangePushPayload 3
1330
+ #define NVTX3EXT_CBID_nvtxRangePopPayload 4
1331
+ #define NVTX3EXT_CBID_nvtxRangeStartPayload 5
1332
+ #define NVTX3EXT_CBID_nvtxRangeEndPayload 6
1333
+ #define NVTX3EXT_CBID_nvtxDomainIsEnabled 7
1334
+ #define NVTX3EXT_CBID_nvtxScopeRegister 12
1335
+
1336
+ #endif /* NVTX_PAYLOAD_CALLBACK_ID_V1 */
1337
+
1338
+ #ifndef NVTX_PAYLOAD_CALLBACK_ID_DEFERRED_V1
1339
+ #define NVTX_PAYLOAD_CALLBACK_ID_DEFERRED_V1
1340
+
1341
+ #define NVTX3EXT_CBID_nvtxTimestampGet 8
1342
+ #define NVTX3EXT_CBID_nvtxTimeDomainRegister 9
1343
+ #define NVTX3EXT_CBID_nvtxTimerSource 10
1344
+ #define NVTX3EXT_CBID_nvtxTimerSourceWithData 11
1345
+ #define NVTX3EXT_CBID_nvtxTimeSyncPoint 13
1346
+ #define NVTX3EXT_CBID_nvtxTimeSyncPointTable 14
1347
+ #define NVTX3EXT_CBID_nvtxTimestampConversionFactor 15
1348
+ #define NVTX3EXT_CBID_nvtxEventSubmit 16
1349
+ #define NVTX3EXT_CBID_nvtxEventBatchSubmit 17
1350
+
1351
+ #endif /* NVTX_PAYLOAD_CALLBACK_ID_DEFERRED_V1 */
1352
+
1353
+ /*** Helper utilities ***/
1354
+
1355
+ /** \brief Helper macro for safe double-cast of pointer to uint64_t value. */
1356
+ #ifndef NVTX_POINTER_AS_PAYLOAD_ULLVALUE
1357
+ # ifdef __cplusplus
1358
+ # define NVTX_POINTER_AS_PAYLOAD_ULLVALUE(p) \
1359
+ static_cast<uint64_t>(reinterpret_cast<uintptr_t>(p))
1360
+ # else
1361
+ #define NVTX_POINTER_AS_PAYLOAD_ULLVALUE(p) (NVTX_STATIC_CAST(uint64_t, NVTX_STATIC_CAST(uintptr_t, p))
1362
+ # endif
1363
+ #endif
1364
+
1365
+ #ifndef NVTX_PAYLOAD_EVTATTR_SET_DATA
1366
+ /**
1367
+ * \brief Helper macro to attach a single payload to an NVTX event attribute.
1368
+ *
1369
+ * @param evtAttr NVTX event attribute (variable name)
1370
+ * @param pldata_addr Address of `nvtxPayloadData_t` variable.
1371
+ * @param schema_id NVTX binary payload schema ID.
1372
+ * @param pl_addr Address of the (actual) payload.
1373
+ * @param sz size of the (actual) payload.
1374
+ */
1375
+ #define NVTX_PAYLOAD_EVTATTR_SET_DATA(evtAttr, pldata_addr, schema_id, pl_addr, sz) \
1376
+ (pldata_addr)->schemaId = schema_id; \
1377
+ (pldata_addr)->size = sz; \
1378
+ (pldata_addr)->payload = pl_addr; \
1379
+ (evtAttr).payload.ullValue = NVTX_POINTER_AS_PAYLOAD_ULLVALUE(pldata_addr); \
1380
+ (evtAttr).payloadType = NVTX_PAYLOAD_TYPE_EXT; \
1381
+ (evtAttr).reserved0 = 1;
1382
+ #endif /* NVTX_PAYLOAD_EVTATTR_SET_DATA */
1383
+
1384
+ #ifndef NVTX_PAYLOAD_EVTATTR_SET_MULTIPLE
1385
+ /**
1386
+ * \brief Helper macro to attach multiple payloads to an NVTX event attribute.
1387
+ *
1388
+ * @param evtAttr NVTX event attribute (variable name)
1389
+ * @param pldata Payload data array (of type `nvtxPayloadData_t`)
1390
+ */
1391
+ #define NVTX_PAYLOAD_EVTATTR_SET_MULTIPLE(evtAttr, pldata) \
1392
+ (evtAttr).payloadType = NVTX_PAYLOAD_TYPE_EXT; \
1393
+ (evtAttr).reserved0 = sizeof(pldata)/sizeof(nvtxPayloadData_t); \
1394
+ (evtAttr).payload.ullValue = NVTX_POINTER_AS_PAYLOAD_ULLVALUE(pldata);
1395
+ #endif /* NVTX_PAYLOAD_EVTATTR_SET_MULTIPLE */
1396
+
1397
+ #ifndef NVTX_PAYLOAD_EVTATTR_SET
1398
+ /*
1399
+ * Do not use this macro directly! It is a helper to attach a single payload to
1400
+ * an NVTX event attribute.
1401
+ * @warning The NVTX push, start or mark operation must not be in an outer scope.
1402
+ */
1403
+ #define NVTX_PAYLOAD_EVTATTR_SET(evtAttr, schema_id, pl_addr, sz) \
1404
+ nvtxPayloadData_t _NVTX_PAYLOAD_DATA_VAR[] = \
1405
+ {{schema_id, sz, pl_addr}}; \
1406
+ (evtAttr)->payload.ullValue = \
1407
+ NVTX_POINTER_AS_PAYLOAD_ULLVALUE(_NVTX_PAYLOAD_DATA_VAR); \
1408
+ (evtAttr)->payloadType = NVTX_PAYLOAD_TYPE_EXT; \
1409
+ (evtAttr)->reserved0 = 1;
1410
+ #endif /* NVTX_PAYLOAD_EVTATTR_SET */
1411
+
1412
+ #ifndef nvtxPayloadRangePush
1413
+ /**
1414
+ * \brief Helper macro to push a range with extended payload.
1415
+ *
1416
+ * @param domain NVTX domain handle
1417
+ * @param evtAttr pointer to NVTX event attribute.
1418
+ * @param schemaId NVTX payload schema ID
1419
+ * @param plAddr Pointer to the binary data (actual payload)
1420
+ * @param size Size of the binary payload data in bytes.
1421
+ */
1422
+ #define nvtxPayloadRangePush(domain, evtAttr, schemaId, plAddr, size) \
1423
+ do { \
1424
+ NVTX_PAYLOAD_EVTATTR_SET(evtAttr, schemaId, plAddr, size) \
1425
+ nvtxDomainRangePushEx(domain, evtAttr); \
1426
+ } while (0)
1427
+ #endif /* nvtxPayloadRangePush */
1428
+
1429
+ #ifndef nvtxPayloadMark
1430
+ /**
1431
+ * \brief Helper macro to set a marker with extended payload.
1432
+ *
1433
+ * @param domain NVTX domain handle
1434
+ * @param evtAttr pointer to NVTX event attribute.
1435
+ * @param schemaId NVTX payload schema ID
1436
+ * @param plAddr Pointer to the binary data (actual payload)
1437
+ * @param size Size of the binary payload data in bytes.
1438
+ */
1439
+ #define nvtxPayloadMark(domain, evtAttr, schemaId, plAddr, size) \
1440
+ do { \
1441
+ NVTX_PAYLOAD_EVTATTR_SET(evtAttr, schemaId, plAddr, size) \
1442
+ nvtxDomainMarkEx(domain, evtAttr); \
1443
+ } while (0)
1444
+ #endif /* nvtxPayloadMark */
1445
+
1446
+ /* Macros to create versioned symbols. */
1447
+ #ifndef NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIERS_V1
1448
+ #define NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIERS_V1
1449
+ #define NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIER_L3(NAME, VERSION, COMPATID) \
1450
+ NAME##_v##VERSION##_bpl##COMPATID
1451
+ #define NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIER_L2(NAME, VERSION, COMPATID) \
1452
+ NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIER_L3(NAME, VERSION, COMPATID)
1453
+ #define NVTX_EXT_PAYLOAD_VERSIONED_ID(NAME) \
1454
+ NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIER_L2(NAME, NVTX_VERSION, NVTX_EXT_PAYLOAD_COMPATID)
1455
+ #endif /* NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIERS_V1 */
1456
+
1457
+ #ifdef __GNUC__
1458
+ #pragma GCC visibility push(internal)
1459
+ #endif
1460
+
1461
+ /* Extension types are required for the implementation and the NVTX handler. */
1462
+ #define NVTX_EXT_TYPES_GUARD
1463
+ #include "nvtxDetail/nvtxExtTypes.h"
1464
+ #undef NVTX_EXT_TYPES_GUARD
1465
+
1466
+ #ifndef NVTX_NO_IMPL
1467
+ #define NVTX_EXT_IMPL_PAYLOAD_GUARD
1468
+ #include "nvtxDetail/nvtxExtImplPayload_v1.h"
1469
+ #undef NVTX_EXT_IMPL_PAYLOAD_GUARD
1470
+ #endif /* NVTX_NO_IMPL */
1471
+
1472
+ #ifdef __GNUC__
1473
+ #pragma GCC visibility pop
1474
+ #endif
1475
+
1476
+ #ifdef __cplusplus
1477
+ }
1478
+ #endif /* __cplusplus */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtPayloadHelper.h ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ #include "nvtxDetail/nvtxExtPayloadHelperInternal.h"
32
+
33
+
34
+ /* This is just an empty marker (for readability), which can be omitted. */
35
+ /* TODO: Fix issue with trailing comma at end of entry list. */
36
+ #define NVTX_PAYLOAD_ENTRIES
37
+
38
+
39
+ /**
40
+ * Use this macro for payload entries that are defined by a schema (nested
41
+ * payload schema).
42
+ */
43
+ #define NVTX_PAYLOAD_NESTED(schemaId) _NVTX_PAYLOAD_NESTED(schemaId)
44
+
45
+
46
+ /**
47
+ * \brief Define a payload schema for an existing C `struct` definition.
48
+ *
49
+ * This macro does
50
+ * 1) create schema description (array of schema entries).
51
+ * 2) set the schema attributes for a static data layout.
52
+ *
53
+ * It can be used in static code or within a function context.
54
+ *
55
+ * Example:
56
+ * NVTX_DEFINE_SCHEMA_FOR_STRUCT(your_struct, "SchemaName",
57
+ * NVTX_PAYLOAD_ENTRIES(
58
+ * (index, TYPE_INT, "integer value"),
59
+ * (dpfloat, TYPE_DOUBLE, "fp64 value"),
60
+ * (text, TYPE_CSTRING, "text", NULL, 24)
61
+ * )
62
+ * )
63
+ *
64
+ * It is required to at least provide the struct name and the payload entries.
65
+ * The first two fields (member name and NVTX entry type) of each payload entry
66
+ * are required.
67
+ *
68
+ * The optional parameters are only allowed to be passed in the predefined order.
69
+ * Hence, `payload_flags` requires `payload_schema` to be given and
70
+ * `prefix` requires `payload_flags` and `payload_schema` to be given.
71
+ * The payload entries are always the last parameter. A maximum of 16 schema
72
+ * entries is supported.
73
+ *
74
+ * It is recommended to use `NVTX_PAYLOAD_SCHEMA_REGISTER` to register the schema.
75
+ *
76
+ * @param struct_id The name of the struct.
77
+ * @param schema_name (Optional 1) name of the payload schema. Default is `NULL`.
78
+ * @param prefix (Optional 2) prefix before the schema and attributes variables,
79
+ * e.g. `static const`. Leave this empty, if no prefix is desired.
80
+ * @param schema_flags (Optional 2) flags to augment the payload schema.
81
+ * Default is `NVTX_PAYLOAD_SCHEMA_FLAG_NONE`.
82
+ * @param schema_id (Optional 4) User-defined payload schema ID.
83
+ * @param entries (Mandatory) Payload schema entries. This is always the last
84
+ * parameter to the macro.
85
+ */
86
+ #define NVTX_DEFINE_SCHEMA_FOR_STRUCT(struct_id, ...) \
87
+ _NVTX_DEFINE_SCHEMA_FOR_STRUCT(struct_id, __VA_ARGS__)
88
+
89
+
90
+ /**
91
+ * \brief Define a C struct together with a matching schema.
92
+ *
93
+ * This macro does
94
+ * 1) define the payload type (typedef struct).
95
+ * 2) create schema description (array of schema entries).
96
+ * 3) set the schema attributes for a static data layout.
97
+ *
98
+ * The macro can be used in static code or within a function context.
99
+ *
100
+ * It defines the schema attributes in `struct_id##Attr`. Thus, it is recommended
101
+ * to use `NVTX_PAYLOAD_SCHEMA_REGISTER(domain, struct_id)` to register the schema.
102
+ *
103
+ * Example:
104
+ * NVTX_DEFINE_STRUCT_WITH_SCHEMA(your_struct_name, "Your schema name",
105
+ * NVTX_PAYLOAD_ENTRIES(
106
+ * (int, index, TYPE_INT, "integer value"),
107
+ * (double, dpfloat, TYPE_DOUBLE, "fp64 value"),
108
+ * (const char, (text, 24), TYPE_CSTRING, "text", NULL, 24)
109
+ * )
110
+ * )
111
+ *
112
+ * The first three fields (C type, member, entry type) of each entry are required.
113
+ * A fixed-size array or string requires a special notation with the member
114
+ * name and the size separated by comma and put into brackets (see last entry
115
+ * in the example).
116
+ *
117
+ * The optional parameters are positional (only allowed to be passed in the
118
+ * predefined order). A maximum of 16 schema entries is supported.
119
+ *
120
+ * @param struct_id The name of the struct.
121
+ * @param schema_name (Optional 1) name of the payload schema. Default is `NULL`.
122
+ * @param prefix (Optional 2) prefix before the schema and attributes variables,
123
+ * e.g. `static const`. Leave this empty, if no prefix is desired.
124
+ * @param schema_flags (Optional 3) flags to augment the payload schema.
125
+ * Default is `NVTX_PAYLOAD_SCHEMA_FLAG_NONE`.
126
+ * @param schema_id (Optional 4) User-defined payload schema ID.
127
+ * @param entries (Mandatory) The schema entries. This is always the last
128
+ * parameter to the macro.
129
+ */
130
+ #define NVTX_DEFINE_STRUCT_WITH_SCHEMA(struct_id, ...) \
131
+ _NVTX_DEFINE_STRUCT_WITH_SCHEMA(struct_id, __VA_ARGS__)
132
+
133
+ /**
134
+ * \brief Initialize and register the NVTX binary payload schema.
135
+ *
136
+ * This does essentially the same as `NVTX_DEFINE_STRUCT_WITH_SCHEMA`, but in
137
+ * addition the schema is registered. The schema ID will be defined as follows:
138
+ * `const uint64_t struct_id##_schemaId`.
139
+ *
140
+ * @param domain The NVTX domain handle.
141
+ * All other parameters are similar to `NVTX_DEFINE_STRUCT_WITH_SCHEMA`.
142
+ */
143
+ #define NVTX_DEFINE_STRUCT_WITH_SCHEMA_AND_REGISTER(domain, struct_id, ...) \
144
+ _NVTX_DEFINE_STRUCT_WITH_SCHEMA(struct_id, __VA_ARGS__) \
145
+ const uint64_t struct_id##_schemaId = nvtxPayloadSchemaRegister(domain, &struct_id##Attr);
146
+
147
+ /**
148
+ * \brief Define payload schema for an existing `struct` and register the schema.
149
+ *
150
+ * This does essentially the same as `NVTX_PAYLOAD_STATIC_SCHEMA_DEFINE`, but in
151
+ * addition, the schema is registered and `uint64_t struct_id##_schemaId` set.
152
+ *
153
+ * @param domain The NVTX domain handle.
154
+ * All other parameters are similar to `NVTX_PAYLOAD_STATIC_SCHEMA_DEFINE`.
155
+ */
156
+ #define NVTX_DEFINE_SCHEMA_FOR_STRUCT_AND_REGISTER(domain, struct_id, ...) \
157
+ _NVTX_DEFINE_SCHEMA_FOR_STRUCT(struct_id, __VA_ARGS__) \
158
+ const uint64_t struct_id##_schemaId = nvtxPayloadSchemaRegister(domain, &struct_id##Attr);
159
+
160
+ /**
161
+ * \brief Create a type definition for the given struct ID and members.
162
+ *
163
+ * This is a convenience macro. A normal `typedef` can be used instead.
164
+ *
165
+ * Example usage:
166
+ * NVTX_DEFINE_STRUCT(your_struct,
167
+ * (double, fp64),
168
+ * (uint8_t, u8),
169
+ * (float, fp32[3])
170
+ * )
171
+ *
172
+ * @param struct_id The name of the struct.
173
+ * @param members The members of the struct.
174
+ */
175
+ #define NVTX_DEFINE_STRUCT(struct_id, ...) \
176
+ _NVTX_PAYLOAD_TYPEDEF_STRUCT(struct_id, __VA_ARGS__)
177
+
178
+ /**
179
+ * \brief Register an NVTX binary payload schema.
180
+ *
181
+ * This is a convenience macro, which takes the same `struct_id` that has been
182
+ * used in other helper macros. Instead, `nvtxPayloadSchemaRegister` can also be
183
+ * used, but `&struct_id##Attr` has to be passed.
184
+ *
185
+ * @param domain The NVTX domain handle.
186
+ * @param struct_id The name of the struct.
187
+ *
188
+ * @return NVTX schema ID
189
+ */
190
+ #define NVTX_PAYLOAD_SCHEMA_REGISTER(domain, struct_id) \
191
+ nvtxPayloadSchemaRegister(domain, &struct_id##Attr);
192
+
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSemanticsCounters.h ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ #include "nvToolsExtPayload.h"
32
+
33
+ /** Identifier of the semantic extension for counters. */
34
+ #ifndef NVTX_SEMANTIC_ID_COUNTERS_V1
35
+ #define NVTX_SEMANTIC_ID_COUNTERS_V1 5
36
+
37
+ /* Use with the version field of `nvtxSemanticsHeader_t`. */
38
+ #define NVTX_COUNTER_SEMANTIC_VERSION 2
39
+
40
+ /*** Flags to augment the counter value. ***/
41
+ #define NVTX_COUNTER_FLAGS_NONE 0
42
+
43
+ /**
44
+ * Convert the fixed point value to a normalized floating point.
45
+ * Use the sign/unsign from the underlying type this flag is applied to.
46
+ * Unsigned [0f : 1f] or signed [-1f : 1f]
47
+ */
48
+ #define NVTX_COUNTER_FLAG_NORMALIZE (1 << 1)
49
+
50
+ /**
51
+ * Tools should apply scale and limits when graphing, ideally in a "soft" way to
52
+ * to see when limits are exceeded.
53
+ */
54
+ #define NVTX_COUNTER_FLAG_LIMIT_MIN (1 << 2)
55
+ #define NVTX_COUNTER_FLAG_LIMIT_MAX (1 << 3)
56
+ #define NVTX_COUNTER_FLAG_LIMITS \
57
+ (NVTX_COUNTER_FLAG_LIMIT_MIN | NVTX_COUNTER_FLAG_LIMIT_MAX)
58
+
59
+ /**
60
+ * Counter value types
61
+ */
62
+ #define NVTX_COUNTER_FLAG_VALUETYPE_ABSOLUTE (1 << 4)
63
+ /* Delta to previous sample, tool-defined if no previous sample is available. */
64
+ #define NVTX_COUNTER_FLAG_VALUETYPE_DELTA (2 << 4)
65
+ #define NVTX_COUNTER_FLAG_VALUETYPE_DELTA_SINCE_START (3 << 4)
66
+
67
+ /**
68
+ * Counter interpolation / effective range of counters.
69
+ */
70
+ /* No interpolation between samples. */
71
+ #define NVTX_COUNTER_FLAG_INTERPOLATION_POINT (1 << 8)
72
+ /* Piecewise constant interpolation between the current and the last sample. */
73
+ #define NVTX_COUNTER_FLAG_INTERPOLATION_SINCE_LAST (2 << 8)
74
+ /* Piecewise constant interpolation between the current and the next sample. */
75
+ #define NVTX_COUNTER_FLAG_INTERPOLATION_UNTIL_NEXT (3 << 8)
76
+ /* Piecewise linear interpolation between samples. */
77
+ #define NVTX_COUNTER_FLAG_INTERPOLATION_LINEAR (4 << 8)
78
+
79
+ /**
80
+ * Datatype for limits union (value of `limitType`).
81
+ */
82
+ #define NVTX_COUNTER_LIMIT_UNDEFINED 0
83
+ #define NVTX_COUNTER_LIMIT_I64 1
84
+ #define NVTX_COUNTER_LIMIT_U64 2
85
+ #define NVTX_COUNTER_LIMIT_F64 3
86
+
87
+
88
+ /**
89
+ * Union of datatypes that can be used as counter value limits.
90
+ */
91
+ typedef union
92
+ {
93
+ int64_t i64;
94
+ uint64_t u64;
95
+ double f64;
96
+ } nvtxCounterLimit_t;
97
+
98
+ /**
99
+ * \brief Specify additional properties of a counter or counter group.
100
+ */
101
+ typedef struct nvtxSemanticsCounter_v1
102
+ {
103
+ /** Header of the semantic extension (with identifier, version, etc.). */
104
+ struct nvtxSemanticsHeader_v1 header;
105
+
106
+ /**
107
+ * Flag if normalization, scale limits, etc. should be applied to counter
108
+ * values.
109
+ */
110
+ uint64_t flags;
111
+
112
+ /** Unit of the counter value (case insensitive) */
113
+ const char* unit;
114
+
115
+ /** Should be 1 if not used. */
116
+ uint64_t unitScaleNumerator;
117
+
118
+ /** Should be 1 if not used. */
119
+ uint64_t unitScaleDenominator;
120
+
121
+ /**
122
+ * Specifies the used union member for `min` and `max`.
123
+ * Use the defines `NVTX_COUNTER_LIMIT_*`.
124
+ */
125
+ int64_t limitType;
126
+
127
+ /** Value limits. */
128
+ nvtxCounterLimit_t min;
129
+ nvtxCounterLimit_t max;
130
+ } nvtxSemanticsCounter_t;
131
+
132
+ #endif /* NVTX_SEMANTIC_ID_COUNTERS_V1 */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSemanticsScope.h ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ #include "nvToolsExtPayload.h"
32
+
33
+ #ifndef NVTX_SEMANTIC_ID_SCOPE_V1
34
+ #define NVTX_SEMANTIC_ID_SCOPE_V1 1
35
+
36
+ /**
37
+ * \brief Specify the NVTX scope for a payload entry.
38
+ *
39
+ * This allows the scope to be set for a specific value or counter in a payload.
40
+ * The scope must be known at schema registration time.
41
+ */
42
+ typedef struct nvtxSemanticsScope_v1
43
+ {
44
+ struct nvtxSemanticsHeader_v1 header;
45
+
46
+ /** Specifies the scope of a payload entry, e.g. a counter or timestamp. */
47
+ uint64_t scopeId;
48
+ } nvtxSemanticsScope_t;
49
+
50
+ #endif /* NVTX_SEMANTIC_ID_SCOPE_V1 */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSemanticsTime.h ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ #include "nvToolsExtPayload.h"
32
+
33
+ /** Identifier of the semantic extension for timestamps. */
34
+ #ifndef NVTX_SEMANTIC_ID_TIME_V1
35
+ #define NVTX_SEMANTIC_ID_TIME_V1 2
36
+
37
+ /* Use with the version field of `nvtxSemanticsHeader_t`. */
38
+ #define NVTX_TIME_SEMANTIC_VERSION 1
39
+
40
+ /** Semantic extension specifying timestamp properties. */
41
+ typedef struct nvtxSemanticsTime_v1
42
+ {
43
+ struct nvtxSemanticsHeader_v1 header;
44
+
45
+ /** Time domain ID or predefined `NVTX_TIMESTAMP_TYPE_*`. */
46
+ uint64_t timeDomainId;
47
+ } nvtxSemanticsTime_t;
48
+
49
+ #endif /* NVTX_SEMANTIC_ID_TIME_V1 */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSync.h ADDED
@@ -0,0 +1,406 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ #include "nvToolsExt.h"
32
+
33
+ #ifndef NVTOOLSEXT_SYNC_V3
34
+ #define NVTOOLSEXT_SYNC_V3
35
+
36
+ #ifdef __cplusplus
37
+ extern "C" {
38
+ #endif /* __cplusplus */
39
+
40
+ /* \cond SHOW_HIDDEN
41
+ * \version NVTX_VERSION_2
42
+ */
43
+ #define NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE (NVTX_STATIC_CAST(uint16_t, sizeof(nvtxSyncUserAttributes_v0)))
44
+ /** \endcond */
45
+
46
+
47
+ /**
48
+ * \page PAGE_SYNCHRONIZATION Synchronization
49
+ *
50
+ * This section covers a subset of the API that allow users to track additional
51
+ * synchronization details of their application. Naming OS synchronization primitives
52
+ * may allow users to better understand the data collected by traced synchronization
53
+ * APIs. Additionally, a user defined synchronization object can allow the users to
54
+ * to tell the tools when the user is building their own synchronization system
55
+ * that do not rely on the OS to provide behaviors and instead use techniques like
56
+ * atomic operations and spinlocks.
57
+ *
58
+ * See module \ref SYNCHRONIZATION for details.
59
+ *
60
+ * \par Example
61
+ * Instrument a mutex class:
62
+ * \code
63
+ * class MyMutex
64
+ * {
65
+ * volatile long bLocked;
66
+ * nvtxSyncUser_t hSync;
67
+ * public:
68
+ * MyMutex(const char* name, nvtxDomainHandle_t d) {
69
+ * bLocked = 0;
70
+ *
71
+ * nvtxSyncUserAttributes_t attribs = { 0 };
72
+ * attribs.version = NVTX_VERSION;
73
+ * attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
74
+ * attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
75
+ * attribs.message.ascii = name;
76
+ * hSync = nvtxDomainSyncUserCreate(d, &attribs);
77
+ * }
78
+ *
79
+ * ~MyMutex() {
80
+ * nvtxDomainSyncUserDestroy(hSync);
81
+ * }
82
+ *
83
+ * bool Lock() {
84
+ * nvtxDomainSyncUserAcquireStart(hSync);
85
+ * bool acquired = __sync_bool_compare_and_swap(&bLocked, 0, 1); // atomic compiler intrinsic
86
+ *
87
+ * if (acquired) {
88
+ * nvtxDomainSyncUserAcquireSuccess(hSync);
89
+ * }
90
+ * else {
91
+ * nvtxDomainSyncUserAcquireFailed(hSync);
92
+ * }
93
+ * return acquired;
94
+ * }
95
+ *
96
+ * void Unlock() {
97
+ * nvtxDomainSyncUserReleasing(hSync);
98
+ * bLocked = false;
99
+ * }
100
+ * };
101
+ * \endcode
102
+ *
103
+ * \version NVTX_VERSION_2
104
+ */
105
+
106
+ /* ------------------------------------------------------------------------- */
107
+ /* \cond SHOW_HIDDEN
108
+ * \brief Used to build a non-colliding value for resource types separated class
109
+ * \version NVTX_VERSION_2
110
+ */
111
+ #define NVTX_RESOURCE_CLASS_SYNC_OS 2 /**< Synchronization objects that are OS specific. */
112
+ #define NVTX_RESOURCE_CLASS_SYNC_PTHREAD 3 /**< Synchronization objects that are from the POSIX Threads API (pthread)*/
113
+ /** \endcond */
114
+
115
+
116
+ /* ------------------------------------------------------------------------- */
117
+ /** \defgroup SYNCHRONIZATION Synchronization
118
+ * See page \ref PAGE_SYNCHRONIZATION.
119
+ * @{
120
+ */
121
+
122
+ /** \brief Resource type values for OSs with POSIX Thread API support
123
+ */
124
+ typedef enum nvtxResourceSyncPosixThreadType_t
125
+ {
126
+ NVTX_RESOURCE_TYPE_SYNC_PTHREAD_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 1), /* pthread_mutex_t */
127
+ NVTX_RESOURCE_TYPE_SYNC_PTHREAD_CONDITION = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 2), /* pthread_cond_t */
128
+ NVTX_RESOURCE_TYPE_SYNC_PTHREAD_RWLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 3), /* pthread_rwlock_t */
129
+ NVTX_RESOURCE_TYPE_SYNC_PTHREAD_BARRIER = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 4), /* pthread_barrier_t */
130
+ NVTX_RESOURCE_TYPE_SYNC_PTHREAD_SPINLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 5), /* pthread_spinlock_t */
131
+ NVTX_RESOURCE_TYPE_SYNC_PTHREAD_ONCE = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 6) /* pthread_once_t */
132
+ } nvtxResourceSyncPosixThreadType_t;
133
+
134
+ /** \brief Resource type values for Windows OSs
135
+ */
136
+ typedef enum nvtxResourceSyncWindowsType_t
137
+ {
138
+ NVTX_RESOURCE_TYPE_SYNC_WINDOWS_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 1),
139
+ NVTX_RESOURCE_TYPE_SYNC_WINDOWS_SEMAPHORE = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 2),
140
+ NVTX_RESOURCE_TYPE_SYNC_WINDOWS_EVENT = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 3),
141
+ NVTX_RESOURCE_TYPE_SYNC_WINDOWS_CRITICAL_SECTION = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 4),
142
+ NVTX_RESOURCE_TYPE_SYNC_WINDOWS_SRWLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 5)
143
+ } nvtxResourceSyncWindowsType_t;
144
+
145
+ /** \brief Resource type values for Linux and Linux derived OSs such as Android
146
+ * \sa
147
+ * ::nvtxResourceSyncPosixThreadType_t
148
+ */
149
+ typedef enum nvtxResourceSyncLinuxType_t
150
+ {
151
+ NVTX_RESOURCE_TYPE_SYNC_LINUX_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 1),
152
+ NVTX_RESOURCE_TYPE_SYNC_LINUX_FUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 2),
153
+ NVTX_RESOURCE_TYPE_SYNC_LINUX_SEMAPHORE = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 3),
154
+ NVTX_RESOURCE_TYPE_SYNC_LINUX_COMPLETION = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 4),
155
+ NVTX_RESOURCE_TYPE_SYNC_LINUX_SPINLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 5),
156
+ NVTX_RESOURCE_TYPE_SYNC_LINUX_SEQLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 6),
157
+ NVTX_RESOURCE_TYPE_SYNC_LINUX_RCU = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 7)
158
+ } nvtxResourceSyncLinuxType_t;
159
+
160
+ /** \brief Resource type values for Android come from Linux.
161
+ * \sa
162
+ * ::nvtxResourceSyncLinuxType_t
163
+ * ::nvtxResourceSyncPosixThreadType_t
164
+ */
165
+ typedef enum nvtxResourceSyncLinuxType_t nvtxResourceSyncAndroidType_t;
166
+
167
+ /** \brief User Defined Synchronization Object Handle .
168
+ * \anchor SYNCUSER_HANDLE_STRUCTURE
169
+ *
170
+ * This structure is opaque to the user and is used as a handle to reference
171
+ * a user defined synchronization object. The tools will return a pointer through the API for the application
172
+ * to hold on its behalf to reference the string in the future.
173
+ *
174
+ */
175
+ typedef struct nvtxSyncUser* nvtxSyncUser_t;
176
+
177
+ /** \brief User Defined Synchronization Object Attributes Structure.
178
+ * \anchor USERDEF_SYNC_ATTRIBUTES_STRUCTURE
179
+ *
180
+ * This structure is used to describe the attributes of a user defined synchronization
181
+ * object. The layout of the structure is defined by a specific version of the tools
182
+ * extension library and can change between different versions of the Tools Extension
183
+ * library.
184
+ *
185
+ * \par Guidelines
186
+ * The caller should always perform the following three tasks when using
187
+ * attributes:
188
+ * <ul>
189
+ * <li>Zero the structure
190
+ * <li>Set the version field
191
+ * <li>Set the size field
192
+ * </ul>
193
+ *
194
+ * Zeroing the structure sets all the event attributes types and values
195
+ * to the default value.
196
+ *
197
+ * The version and size field are used by the Tools Extension
198
+ * implementation to handle multiple versions of the attributes structure.
199
+ *
200
+ * It is recommended that the caller use one of the following to methods
201
+ * to initialize the event attributes structure:
202
+ *
203
+ * \par Method 1
204
+ * Initializing nvtxEventAttributes for future compatibility:
205
+ * \code
206
+ * nvtxSyncUserAttributes_t attribs = {0};
207
+ * attribs.version = NVTX_VERSION;
208
+ * attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
209
+ * \endcode
210
+ *
211
+ * \par Method 2
212
+ * Initializing nvtxSyncUserAttributes_t for a specific version:
213
+ * \code
214
+ * nvtxSyncUserAttributes_t attribs = {0};
215
+ * attribs.version = 1;
216
+ * attribs.size = (uint16_t)(sizeof(nvtxSyncUserAttributes_t));
217
+ * \endcode
218
+ *
219
+ * If the caller uses Method 1 it is critical that the entire binary
220
+ * layout of the structure be configured to 0 so that all fields
221
+ * are initialized to the default value.
222
+ *
223
+ * The caller should either use both NVTX_VERSION and
224
+ * NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
225
+ * and a versioned type (Method 2). Using a mix of the two methods
226
+ * will likely cause either source level incompatibility or binary
227
+ * incompatibility in the future.
228
+ *
229
+ * \par Example
230
+ * Populate a sync attributes structure:
231
+ * \code
232
+ * // Initialize
233
+ * nvtxSyncUserAttributes_t attribs = {0};
234
+ * attribs.version = NVTX_VERSION;
235
+ * attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
236
+ *
237
+ * // Configure the Attributes
238
+ * attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
239
+ * attribs.message.ascii = "Example";
240
+ * \endcode
241
+ *
242
+ * \sa
243
+ * ::nvtxDomainSyncUserCreate
244
+ */
245
+ typedef struct nvtxSyncUserAttributes_v0
246
+ {
247
+ /**
248
+ * \brief Version flag of the structure.
249
+ *
250
+ * Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
251
+ * supported in this header file. This can optionally be overridden to
252
+ * another version of the tools extension library.
253
+ */
254
+ uint16_t version;
255
+
256
+ /**
257
+ * \brief Size of the structure.
258
+ *
259
+ * Needs to be set to the size in bytes of the event attribute
260
+ * structure used to specify the event.
261
+ */
262
+ uint16_t size;
263
+
264
+ /** \brief Message type specified in this attribute structure.
265
+ *
266
+ * Defines the message format of the attribute structure's \ref nvtxSyncUserAttributes_v0::message
267
+ * "message" field.
268
+ *
269
+ * Default Value is NVTX_MESSAGE_UNKNOWN
270
+ */
271
+ int32_t messageType; /* nvtxMessageType_t */
272
+
273
+ /** \brief Message assigned to this attribute structure.
274
+ *
275
+ * The text message that is attached to an event.
276
+ */
277
+ nvtxMessageValue_t message;
278
+
279
+ } nvtxSyncUserAttributes_v0;
280
+
281
+ typedef struct nvtxSyncUserAttributes_v0 nvtxSyncUserAttributes_t;
282
+
283
+ /* ------------------------------------------------------------------------- */
284
+ /** \brief Create a user defined synchronization object
285
+ * This is used to track non-OS synchronization working with spinlocks and atomics
286
+ *
287
+ * \param domain - Domain to own the resource
288
+ * \param attribs - A structure to assign multiple attributes to the object.
289
+ *
290
+ * \return A handle that represents the newly created user defined synchronization object.
291
+ *
292
+ * \sa
293
+ * ::nvtxDomainSyncUserCreate
294
+ * ::nvtxDomainSyncUserDestroy
295
+ * ::nvtxDomainSyncUserAcquireStart
296
+ * ::nvtxDomainSyncUserAcquireFailed
297
+ * ::nvtxDomainSyncUserAcquireSuccess
298
+ * ::nvtxDomainSyncUserReleasing
299
+ *
300
+ * \version NVTX_VERSION_2
301
+ */
302
+ NVTX_DECLSPEC nvtxSyncUser_t NVTX_API nvtxDomainSyncUserCreate(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
303
+
304
+ /* ------------------------------------------------------------------------- */
305
+ /** \brief Destroy a user defined synchronization object
306
+ * This is used to track non-OS synchronization working with spinlocks and atomics
307
+ *
308
+ * \param handle - A handle to the object to operate on.
309
+ *
310
+ * \sa
311
+ * ::nvtxDomainSyncUserCreate
312
+ * ::nvtxDomainSyncUserDestroy
313
+ * ::nvtxDomainSyncUserAcquireStart
314
+ * ::nvtxDomainSyncUserAcquireFailed
315
+ * ::nvtxDomainSyncUserAcquireSuccess
316
+ * ::nvtxDomainSyncUserReleasing
317
+ *
318
+ * \version NVTX_VERSION_2
319
+ */
320
+ NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserDestroy(nvtxSyncUser_t handle);
321
+
322
+ /* ------------------------------------------------------------------------- */
323
+ /** \brief Signal to tools that an attempt to acquire a user defined synchronization object
324
+ *
325
+ * \param handle - A handle to the object to operate on.
326
+ *
327
+ * \sa
328
+ * ::nvtxDomainSyncUserCreate
329
+ * ::nvtxDomainSyncUserDestroy
330
+ * ::nvtxDomainSyncUserAcquireStart
331
+ * ::nvtxDomainSyncUserAcquireFailed
332
+ * ::nvtxDomainSyncUserAcquireSuccess
333
+ * ::nvtxDomainSyncUserReleasing
334
+ *
335
+ * \version NVTX_VERSION_2
336
+ */
337
+ NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireStart(nvtxSyncUser_t handle);
338
+
339
+ /* ------------------------------------------------------------------------- */
340
+ /** \brief Signal to tools of failure in acquiring a user defined synchronization object
341
+ * This should be called after \ref nvtxDomainSyncUserAcquireStart
342
+ *
343
+ * \param handle - A handle to the object to operate on.
344
+ *
345
+ * \sa
346
+ * ::nvtxDomainSyncUserCreate
347
+ * ::nvtxDomainSyncUserDestroy
348
+ * ::nvtxDomainSyncUserAcquireStart
349
+ * ::nvtxDomainSyncUserAcquireFailed
350
+ * ::nvtxDomainSyncUserAcquireSuccess
351
+ * ::nvtxDomainSyncUserReleasing
352
+ *
353
+ * \version NVTX_VERSION_2
354
+ */
355
+ NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireFailed(nvtxSyncUser_t handle);
356
+
357
+ /* ------------------------------------------------------------------------- */
358
+ /** \brief Signal to tools of success in acquiring a user defined synchronization object
359
+ * This should be called after \ref nvtxDomainSyncUserAcquireStart.
360
+ *
361
+ * \param handle - A handle to the object to operate on.
362
+ *
363
+ * \sa
364
+ * ::nvtxDomainSyncUserCreate
365
+ * ::nvtxDomainSyncUserDestroy
366
+ * ::nvtxDomainSyncUserAcquireStart
367
+ * ::nvtxDomainSyncUserAcquireFailed
368
+ * ::nvtxDomainSyncUserAcquireSuccess
369
+ * ::nvtxDomainSyncUserReleasing
370
+ *
371
+ * \version NVTX_VERSION_2
372
+ */
373
+ NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireSuccess(nvtxSyncUser_t handle);
374
+
375
+ /* ------------------------------------------------------------------------- */
376
+ /** \brief Signal to tools of releasing a reservation on user defined synchronization object
377
+ * This should be called after \ref nvtxDomainSyncUserAcquireSuccess.
378
+ *
379
+ * \param handle - A handle to the object to operate on.
380
+ *
381
+ * \sa
382
+ * ::nvtxDomainSyncUserCreate
383
+ * ::nvtxDomainSyncUserDestroy
384
+ * ::nvtxDomainSyncUserAcquireStart
385
+ * ::nvtxDomainSyncUserAcquireFailed
386
+ * ::nvtxDomainSyncUserAcquireSuccess
387
+ * ::nvtxDomainSyncUserReleasing
388
+ *
389
+ * \version NVTX_VERSION_2
390
+ */
391
+ NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserReleasing(nvtxSyncUser_t handle);
392
+
393
+
394
+ /** @} */ /*END defgroup*/
395
+
396
+ #ifdef __cplusplus
397
+ }
398
+ #endif /* __cplusplus */
399
+
400
+ #ifndef NVTX_NO_IMPL
401
+ #define NVTX_IMPL_GUARD_SYNC /* Ensure other headers cannot be included directly */
402
+ #include "nvtxDetail/nvtxImplSync_v3.h"
403
+ #undef NVTX_IMPL_GUARD_SYNC
404
+ #endif /*NVTX_NO_IMPL*/
405
+
406
+ #endif /* NVTOOLSEXT_SYNC_V3 */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtx3.hpp ADDED
The diff for this file is too large to render. See raw diff
 
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtHelperMacros.h ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ #ifndef NVTX_EXT_HELPER_MACROS_H
32
+ #define NVTX_EXT_HELPER_MACROS_H
33
+
34
+ #if !defined(NVTX_NULLPTR)
35
+ #if defined(__cplusplus) && __cplusplus >= 201103L
36
+ #define NVTX_NULLPTR nullptr
37
+ #else
38
+ #define NVTX_NULLPTR NULL
39
+ #endif
40
+ #endif
41
+
42
+ /* Combine tokens */
43
+ #define _NVTX_EXT_CONCAT(a, b) a##b
44
+ #define NVTX_EXT_CONCAT(a, b) _NVTX_EXT_CONCAT(a, b)
45
+
46
+ /* Resolves to the number of arguments passed. */
47
+ #define NVTX_EXT_NUM_ARGS(...) \
48
+ NVTX_EXT_SELECTA16(__VA_ARGS__, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, throwaway)
49
+ #define NVTX_EXT_SELECTA16(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, ...) a16
50
+
51
+ /* Cast argument(s) to void to prevent unused variable warnings. */
52
+ #define _NVTX_EXT_VOIDIFY0()
53
+ #define _NVTX_EXT_VOIDIFY1(a1) (void)a1;
54
+ #define _NVTX_EXT_VOIDIFY2(a1, a2) (void)a1; (void)a2;
55
+ #define _NVTX_EXT_VOIDIFY3(a1, a2, a3) (void)a1; (void)a2; (void)a3;
56
+ #define _NVTX_EXT_VOIDIFY4(a1, a2, a3, a4) (void)a1; (void)a2; (void)a3; (void)a4;
57
+ #define _NVTX_EXT_VOIDIFY5(a1, a2, a3, a4, a5) (void)a1; (void)a2; (void)a3; (void)a4; (void)a5;
58
+ #define _NVTX_EXT_VOIDIFY6(a1, a2, a3, a4, a5, a6) (void)a1; (void)a2; (void)a3; (void)a4; (void)a5; (void)a6;
59
+
60
+ /* Mark function arguments as unused. */
61
+ #define NVTX_EXT_HELPER_UNUSED_ARGS(...) \
62
+ NVTX_EXT_CONCAT(_NVTX_EXT_VOIDIFY, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
63
+
64
+ #endif /* NVTX_EXT_HELPER_MACROS_H */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImpl.h ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_EXT_IMPL_GUARD
22
+ #error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+ #ifndef NVTX_EXT_IMPL_H
36
+ #define NVTX_EXT_IMPL_H
37
+
38
+ #include <stdlib.h>
39
+ #include <stdio.h>
40
+ #include <string.h>
41
+ #include <wchar.h>
42
+
43
+ /* ---- Include required platform headers ---- */
44
+
45
+ #if defined(_WIN32)
46
+
47
+ #include <windows.h>
48
+
49
+ #else
50
+ #include <unistd.h>
51
+
52
+ #if defined(__ANDROID__)
53
+ #include <android/api-level.h>
54
+ #endif
55
+
56
+ #if defined(__linux__) || defined(__CYGWIN__)
57
+ #include <sched.h>
58
+ #endif
59
+
60
+ #include <sys/types.h>
61
+ #include <limits.h>
62
+ #include <dlfcn.h>
63
+ #include <fcntl.h>
64
+ #include <errno.h>
65
+ #include <pthread.h>
66
+
67
+ #endif
68
+
69
+ /* ---- Define macros used in this file ---- */
70
+
71
+ #ifdef NVTX_DEBUG_PRINT
72
+ #ifdef __ANDROID__
73
+ #include <android/log.h>
74
+ #define NVTX_ERR(...) __android_log_print(ANDROID_LOG_ERROR, "NVTOOLSEXT", __VA_ARGS__);
75
+ #define NVTX_INFO(...) __android_log_print(ANDROID_LOG_INFO, "NVTOOLSEXT", __VA_ARGS__);
76
+ #else
77
+ #include <stdio.h>
78
+ #define NVTX_ERR(...) fprintf(stderr, "NVTX_ERROR: " __VA_ARGS__)
79
+ #define NVTX_INFO(...) fprintf(stderr, "NVTX_INFO: " __VA_ARGS__)
80
+ #endif
81
+ #else /* !defined(NVTX_DEBUG_PRINT) */
82
+ #define NVTX_ERR(...)
83
+ #define NVTX_INFO(...)
84
+ #endif
85
+
86
+ #ifdef __cplusplus
87
+ extern "C" {
88
+ #endif /* __cplusplus */
89
+
90
+ #ifdef __GNUC__
91
+ #pragma GCC visibility push(hidden)
92
+ #endif
93
+
94
+ #define NVTX_EXTENSION_FRESH 0 /* Uninitialized extension or function slot */
95
+ #define NVTX_EXTENSION_DISABLED 1 /* Disabled extension or function slot */
96
+ #define NVTX_EXTENSION_STARTING 2 /* Extension is being initialized. */
97
+ #define NVTX_EXTENSION_LOADED 3 /* Extension is initialized successfully. */
98
+ #define NVTX_EXTENSION_INIT_FN_FAILED 4 /* Extension init function returned failure. */
99
+
100
+ /* Function slots are local to each extension */
101
+ typedef struct nvtxExtGlobals1_t
102
+ {
103
+ NvtxExtInitializeInjectionFunc_t injectionFnPtr;
104
+ } nvtxExtGlobals1_t;
105
+
106
+ NVTX_LINKONCE_DEFINE_GLOBAL nvtxExtGlobals1_t NVTX_VERSIONED_IDENTIFIER(nvtxExtGlobals1) =
107
+ {
108
+ NVTX_NULLPTR
109
+ };
110
+
111
+ #define NVTX_EXT_INIT_GUARD
112
+ #include "nvtxExtInit.h"
113
+ #undef NVTX_EXT_INIT_GUARD
114
+
115
+ #ifdef __GNUC__
116
+ #pragma GCC visibility pop
117
+ #endif
118
+
119
+ #ifdef __cplusplus
120
+ } /* extern "C" */
121
+ #endif /* __cplusplus */
122
+
123
+ #endif /* NVTX_EXT_IMPL_H */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplCounters_v1.h ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_EXT_IMPL_COUNTERS_GUARD
22
+ #error Never include this file directly -- it is automatically included by nvToolsExtCounters.h (except when NVTX_NO_IMPL is defined).
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+ #define NVTX_EXT_IMPL_GUARD
36
+ #include "nvtxExtImpl.h"
37
+ #undef NVTX_EXT_IMPL_GUARD
38
+
39
+ #ifndef NVTX_EXT_IMPL_COUNTERS_V1
40
+ #define NVTX_EXT_IMPL_COUNTERS_V1
41
+
42
+ #ifdef __cplusplus
43
+ extern "C" {
44
+ #endif /* __cplusplus */
45
+
46
+ #ifdef NVTX_DISABLE
47
+
48
+ #include "nvtxExtHelperMacros.h"
49
+
50
+ #define NVTX_EXT_COUNTERS_IMPL_FN_V1(ret_type, fn_name, signature, arg_names) \
51
+ NVTX_DECLSPEC ret_type NVTX_API fn_name signature { \
52
+ NVTX_SET_NAME_MANGLING_OPTIONS \
53
+ NVTX_EXT_HELPER_UNUSED_ARGS arg_names \
54
+ NVTX_EXT_FN_RETURN_INVALID(ret_type) \
55
+ }
56
+
57
+ #else /* NVTX_DISABLE */
58
+
59
+ /*
60
+ * Function slots for the counters extension. First entry is the module state,
61
+ * initialized to `0` (`NVTX_EXTENSION_FRESH`).
62
+ */
63
+ #define NVTX_EXT_COUNTERS_SLOT_COUNT 63
64
+
65
+ NVTX_LINKONCE_DEFINE_GLOBAL intptr_t
66
+ NVTX_EXT_COUNTERS_VERSIONED_ID(nvtxExtCountersSlots)[NVTX_EXT_COUNTERS_SLOT_COUNT + 1]
67
+ = {0};
68
+
69
+ /* Avoid warnings about missing prototype. */
70
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_EXT_COUNTERS_VERSIONED_ID(nvtxExtCountersInitOnce)(void);
71
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_EXT_COUNTERS_VERSIONED_ID(nvtxExtCountersInitOnce)(void)
72
+ {
73
+ intptr_t* fnSlots = NVTX_EXT_COUNTERS_VERSIONED_ID(nvtxExtCountersSlots) + 1;
74
+ nvtxExtModuleSegment_t segment = {
75
+ 0, /* unused (only one segment) */
76
+ NVTX_EXT_COUNTERS_SLOT_COUNT,
77
+ NVTX_NULLPTR /* function slots */
78
+ };
79
+
80
+ nvtxExtModuleInfo_t module_info = {
81
+ NVTX_VERSION, sizeof(nvtxExtModuleInfo_t),
82
+ NVTX_EXT_COUNTERS_MODULEID, NVTX_EXT_COUNTERS_COMPATID,
83
+ 1, NVTX_NULLPTR, /* number of segments, segments */
84
+ NVTX_NULLPTR, /* no export function needed */
85
+ NVTX_NULLPTR /* no extension private info */
86
+ };
87
+
88
+ segment.functionSlots = fnSlots;
89
+ module_info.segments = &segment;
90
+
91
+ NVTX_INFO( "%s\n", __FUNCTION__ );
92
+
93
+ NVTX_VERSIONED_IDENTIFIER(nvtxExtInitOnce)(&module_info,
94
+ NVTX_EXT_COUNTERS_VERSIONED_ID(nvtxExtCountersSlots));
95
+ }
96
+
97
+ #define NVTX_EXT_COUNTERS_IMPL_FN_V1(ret_type, fn_name, signature, arg_names) \
98
+ typedef ret_type (*fn_name##_impl_fntype)signature; \
99
+ NVTX_DECLSPEC ret_type NVTX_API fn_name signature { \
100
+ NVTX_SET_NAME_MANGLING_OPTIONS \
101
+ intptr_t* pSlot = &NVTX_EXT_COUNTERS_VERSIONED_ID(nvtxExtCountersSlots)[NVTX3EXT_CBID_##fn_name + 1]; \
102
+ intptr_t slot = *pSlot; \
103
+ if (slot != NVTX_EXTENSION_DISABLED) { \
104
+ if (slot != NVTX_EXTENSION_FRESH) { \
105
+ NVTX_EXT_FN_RETURN (*NVTX_REINTERPRET_CAST(fn_name##_impl_fntype, slot)) arg_names; \
106
+ } else { \
107
+ NVTX_EXT_COUNTERS_VERSIONED_ID(nvtxExtCountersInitOnce)(); \
108
+ /* Re-read function slot after extension initialization. */ \
109
+ slot = *pSlot; \
110
+ if (slot != NVTX_EXTENSION_DISABLED && slot != NVTX_EXTENSION_FRESH) { \
111
+ NVTX_EXT_FN_RETURN (*NVTX_REINTERPRET_CAST(fn_name##_impl_fntype, slot)) arg_names; \
112
+ } \
113
+ } \
114
+ } \
115
+ NVTX_EXT_FN_RETURN_INVALID(ret_type) /* No tool attached. */ \
116
+ }
117
+
118
+ #endif /* NVTX_DISABLE */
119
+
120
+ /* Non-void functions. */
121
+ #define NVTX_EXT_FN_RETURN return
122
+ #define NVTX_EXT_FN_RETURN_INVALID(rtype) return NVTX_STATIC_CAST(rtype, 0);
123
+
124
+ NVTX_EXT_COUNTERS_IMPL_FN_V1(uint64_t, nvtxCounterRegister,
125
+ (nvtxDomainHandle_t domain, const nvtxCounterAttr_t* attr),
126
+ (domain, attr))
127
+
128
+ #undef NVTX_EXT_FN_RETURN
129
+ #undef NVTX_EXT_FN_RETURN_INVALID
130
+ /* END: Non-void functions. */
131
+
132
+ /* void functions. */
133
+ #define NVTX_EXT_FN_RETURN
134
+ #define NVTX_EXT_FN_RETURN_INVALID(rtype)
135
+
136
+ NVTX_EXT_COUNTERS_IMPL_FN_V1(void, nvtxCounterSampleInt64,
137
+ (nvtxDomainHandle_t domain, uint64_t counterId, int64_t value),
138
+ (domain, counterId, value))
139
+
140
+ NVTX_EXT_COUNTERS_IMPL_FN_V1(void, nvtxCounterSampleFloat64,
141
+ (nvtxDomainHandle_t domain, uint64_t counterId, double value),
142
+ (domain, counterId, value))
143
+
144
+ NVTX_EXT_COUNTERS_IMPL_FN_V1(void, nvtxCounterSample,
145
+ (nvtxDomainHandle_t domain, uint64_t counterId, const void* values, size_t size),
146
+ (domain, counterId, values, size))
147
+
148
+ NVTX_EXT_COUNTERS_IMPL_FN_V1(void, nvtxCounterSampleNoValue,
149
+ (nvtxDomainHandle_t domain, uint64_t counterId, uint8_t reason),
150
+ (domain, counterId, reason))
151
+
152
+ NVTX_EXT_COUNTERS_IMPL_FN_V1(void, nvtxCounterBatchSubmit,
153
+ (nvtxDomainHandle_t domain, const nvtxCounterBatch_t* counterData),
154
+ (domain, counterData))
155
+
156
+ #undef NVTX_EXT_FN_RETURN
157
+ #undef NVTX_EXT_FN_RETURN_INVALID
158
+ /* END: void functions. */
159
+
160
+ /* Keep NVTX_EXT_COUNTERS_IMPL_FN_V1 defined for a future version of this extension. */
161
+
162
+ #ifdef __cplusplus
163
+ } /* extern "C" */
164
+ #endif /* __cplusplus */
165
+
166
+ #endif /* NVTX_EXT_IMPL_COUNTERS_V1 */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplMemCudaRt_v1.h ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_EXT_IMPL_MEM_CUDART_GUARD
22
+ #error Never include this file directly -- it is automatically included by nvToolsExtMemCudaRt.h (except when NVTX_NO_IMPL is defined).
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+ #ifndef NVTX_EXT_IMPL_MEM_CUDART_V1
36
+ #define NVTX_EXT_IMPL_MEM_CUDART_V1
37
+
38
+ #ifdef __cplusplus
39
+ extern "C" {
40
+ #endif /* __cplusplus */
41
+
42
+ /* NVTX_EXT_MEM_IMPL_FN_V1 defined in nvtxExtImplMem_v1.h */
43
+
44
+ /* Non-void functions. */
45
+ #define NVTX_EXT_FN_RETURN return
46
+ #define NVTX_EXT_FN_RETURN_INVALID(rtype) return NVTX_NULLPTR;
47
+
48
+ NVTX_EXT_MEM_IMPL_FN_V1(nvtxMemPermissionsHandle_t, nvtxMemCudaGetProcessWidePermissions, (nvtxDomainHandle_t domain), (domain))
49
+
50
+ NVTX_EXT_MEM_IMPL_FN_V1(nvtxMemPermissionsHandle_t, nvtxMemCudaGetDeviceWidePermissions, (nvtxDomainHandle_t domain, int device), (domain, device))
51
+
52
+ #undef NVTX_EXT_FN_RETURN
53
+ #undef NVTX_EXT_FN_RETURN_INVALID
54
+ /* END: Non-void functions. */
55
+
56
+ /* void functions. */
57
+ #define NVTX_EXT_FN_RETURN
58
+ #define NVTX_EXT_FN_RETURN_INVALID(rtype)
59
+
60
+ NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemCudaSetPeerAccess, (nvtxDomainHandle_t domain, nvtxMemPermissionsHandle_t permissions, int devicePeer, uint32_t flags), (domain, permissions, devicePeer, flags))
61
+
62
+ NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemCudaMarkInitialized, (nvtxDomainHandle_t domain, cudaStream_t stream, uint8_t isPerThreadStream, nvtxMemMarkInitializedBatch_t const* desc), (domain, stream, isPerThreadStream, desc))
63
+
64
+ #undef NVTX_EXT_FN_RETURN
65
+ #undef NVTX_EXT_FN_RETURN_INVALID
66
+ /* END: void functions. */
67
+
68
+ #ifdef __cplusplus
69
+ } /* extern "C" */
70
+ #endif /* __cplusplus */
71
+
72
+ #endif /* NVTX_EXT_IMPL_MEM_CUDART_V1 */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplMem_v1.h ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_EXT_IMPL_MEM_GUARD
22
+ #error Never include this file directly -- it is automatically included by nvToolsExtMem.h (except when NVTX_NO_IMPL is defined).
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+ #define NVTX_EXT_IMPL_GUARD
36
+ #include "nvtxExtImpl.h"
37
+ #undef NVTX_EXT_IMPL_GUARD
38
+
39
+ #ifndef NVTX_EXT_IMPL_MEM_V1
40
+ #define NVTX_EXT_IMPL_MEM_V1
41
+
42
+ #ifdef __cplusplus
43
+ extern "C" {
44
+ #endif /* __cplusplus */
45
+
46
+ #ifdef NVTX_DISABLE
47
+
48
+ #include "nvtxExtHelperMacros.h"
49
+
50
+ #define NVTX_EXT_MEM_IMPL_FN_V1(ret_type, fn_name, signature, arg_names) \
51
+ NVTX_DECLSPEC ret_type NVTX_API fn_name signature { \
52
+ NVTX_SET_NAME_MANGLING_OPTIONS \
53
+ NVTX_EXT_HELPER_UNUSED_ARGS arg_names \
54
+ NVTX_EXT_FN_RETURN_INVALID(ret_type) \
55
+ }
56
+
57
+ #else /* NVTX_DISABLE */
58
+
59
+ /*
60
+ * Function slots for the memory extension. First entry is the module
61
+ * state, initialized to `0` (`NVTX_EXTENSION_FRESH`).
62
+ */
63
+ #define NVTX_EXT_MEM_SLOT_COUNT 63
64
+
65
+ NVTX_LINKONCE_DEFINE_GLOBAL intptr_t
66
+ NVTX_EXT_MEM_VERSIONED_ID(nvtxExtMemSlots)[NVTX_EXT_MEM_SLOT_COUNT + 1]
67
+ = {0};
68
+
69
+ /* Avoid warnings about missing prototype. */
70
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_EXT_MEM_VERSIONED_ID(nvtxExtMemInitOnce)(void);
71
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_EXT_MEM_VERSIONED_ID(nvtxExtMemInitOnce)(void)
72
+ {
73
+ intptr_t* fnSlots = NVTX_EXT_MEM_VERSIONED_ID(nvtxExtMemSlots) + 1;
74
+ nvtxExtModuleSegment_t segment = {
75
+ 1, /* only one segment, hard-code ID */
76
+ NVTX_EXT_MEM_SLOT_COUNT,
77
+ NVTX_NULLPTR /* function slots */
78
+ };
79
+
80
+ nvtxExtModuleInfo_t module_info = {
81
+ NVTX_VERSION, sizeof(nvtxExtModuleInfo_t),
82
+ NVTX_EXT_MODULEID_MEM, NVTX_EXT_COMPATID_MEM,
83
+ 1, NVTX_NULLPTR, /* number of segments, segments */
84
+ NVTX_NULLPTR, /* no export function needed */
85
+ NVTX_NULLPTR /* no extension private info */
86
+ };
87
+
88
+ segment.functionSlots = fnSlots;
89
+ module_info.segments = &segment;
90
+
91
+ NVTX_INFO( "%s\n", __FUNCTION__ );
92
+
93
+ NVTX_VERSIONED_IDENTIFIER(nvtxExtInitOnce)(&module_info,
94
+ NVTX_EXT_MEM_VERSIONED_ID(nvtxExtMemSlots));
95
+ }
96
+
97
+ #define NVTX_EXT_MEM_IMPL_FN_V1(ret_type, fn_name, signature, arg_names) \
98
+ typedef ret_type (*fn_name##_impl_fntype)signature; \
99
+ NVTX_DECLSPEC ret_type NVTX_API fn_name signature { \
100
+ NVTX_SET_NAME_MANGLING_OPTIONS \
101
+ intptr_t* pSlot = &NVTX_EXT_MEM_VERSIONED_ID(nvtxExtMemSlots)[NVTX3EXT_CBID_##fn_name + 1]; \
102
+ intptr_t slot = *pSlot; \
103
+ if (slot != NVTX_EXTENSION_DISABLED) { \
104
+ if (slot != NVTX_EXTENSION_FRESH) { \
105
+ NVTX_EXT_FN_RETURN (*NVTX_REINTERPRET_CAST(fn_name##_impl_fntype, slot)) arg_names; \
106
+ } else { \
107
+ NVTX_EXT_MEM_VERSIONED_ID(nvtxExtMemInitOnce)(); \
108
+ /* Re-read function slot after extension initialization. */ \
109
+ slot = *pSlot; \
110
+ if (slot != NVTX_EXTENSION_DISABLED && slot != NVTX_EXTENSION_FRESH) { \
111
+ NVTX_EXT_FN_RETURN (*NVTX_REINTERPRET_CAST(fn_name##_impl_fntype, slot)) arg_names; \
112
+ } \
113
+ } \
114
+ } \
115
+ NVTX_EXT_FN_RETURN_INVALID(ret_type) /* No tool attached. */ \
116
+ }
117
+
118
+ #endif /* NVTX_DISABLE */
119
+
120
+ /* Non-void functions. */
121
+ #define NVTX_EXT_FN_RETURN return
122
+ #define NVTX_EXT_FN_RETURN_INVALID(rtype) return NVTX_NULLPTR;
123
+
124
+ NVTX_EXT_MEM_IMPL_FN_V1(nvtxMemHeapHandle_t, nvtxMemHeapRegister, (nvtxDomainHandle_t domain, nvtxMemHeapDesc_t const* desc), (domain, desc))
125
+
126
+ NVTX_EXT_MEM_IMPL_FN_V1(nvtxMemPermissionsHandle_t, nvtxMemPermissionsCreate, (nvtxDomainHandle_t domain, int32_t creationflags), (domain, creationflags))
127
+
128
+ #undef NVTX_EXT_FN_RETURN
129
+ #undef NVTX_EXT_FN_RETURN_INVALID
130
+ /* END: Non-void functions. */
131
+
132
+ /* void functions. */
133
+ #define NVTX_EXT_FN_RETURN
134
+ #define NVTX_EXT_FN_RETURN_INVALID(rtype)
135
+
136
+ NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemHeapUnregister, (nvtxDomainHandle_t domain, nvtxMemHeapHandle_t heap), (domain, heap))
137
+
138
+ NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemHeapReset, (nvtxDomainHandle_t domain, nvtxMemHeapHandle_t heap), (domain, heap))
139
+
140
+ NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemRegionsRegister, (nvtxDomainHandle_t domain, nvtxMemRegionsRegisterBatch_t const* desc), (domain, desc))
141
+
142
+ NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemRegionsResize, (nvtxDomainHandle_t domain, nvtxMemRegionsResizeBatch_t const* desc), (domain, desc))
143
+
144
+ NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemRegionsUnregister, (nvtxDomainHandle_t domain, nvtxMemRegionsUnregisterBatch_t const* desc), (domain, desc))
145
+
146
+ NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemRegionsName, (nvtxDomainHandle_t domain, nvtxMemRegionsNameBatch_t const* desc), (domain, desc))
147
+
148
+ NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemPermissionsAssign, (nvtxDomainHandle_t domain, nvtxMemPermissionsAssignBatch_t const* desc), (domain, desc))
149
+
150
+ NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemPermissionsDestroy, (nvtxDomainHandle_t domain, nvtxMemPermissionsHandle_t permissions), (domain, permissions))
151
+
152
+ NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemPermissionsReset, (nvtxDomainHandle_t domain, nvtxMemPermissionsHandle_t permissions), (domain, permissions))
153
+
154
+ NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemPermissionsBind, (nvtxDomainHandle_t domain, nvtxMemPermissionsHandle_t permissions, uint32_t bindScope, uint32_t bindFlags), (domain, permissions, bindScope, bindFlags))
155
+
156
+ NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemPermissionsUnbind, (nvtxDomainHandle_t domain, uint32_t bindScope), (domain, bindScope))
157
+
158
+ #undef NVTX_EXT_FN_RETURN
159
+ #undef NVTX_EXT_FN_RETURN_INVALID
160
+ /* END: void functions. */
161
+
162
+ /* Keep NVTX_EXT_MEM_IMPL_FN_V1 defined for a future version of this extension. */
163
+
164
+ #ifdef __cplusplus
165
+ } /* extern "C" */
166
+ #endif /* __cplusplus */
167
+
168
+ #endif /* NVTX_EXT_IMPL_MEM_V1 */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplPayload_v1.h ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_EXT_IMPL_PAYLOAD_GUARD
22
+ #error Never include this file directly -- it is automatically included by nvToolsExtPayload.h (except when NVTX_NO_IMPL is defined).
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+ #define NVTX_EXT_IMPL_GUARD
36
+ #include "nvtxExtImpl.h"
37
+ #undef NVTX_EXT_IMPL_GUARD
38
+
39
+ #ifndef NVTX_EXT_IMPL_PAYLOAD_V1
40
+ #define NVTX_EXT_IMPL_PAYLOAD_V1
41
+
42
+ #ifdef __cplusplus
43
+ extern "C" {
44
+ #endif /* __cplusplus */
45
+
46
+ #ifdef NVTX_DISABLE
47
+
48
+ #include "nvtxExtHelperMacros.h"
49
+
50
+ #define NVTX_EXT_PAYLOAD_IMPL_FN_V1(ret_type, fn_name, signature, arg_names) \
51
+ NVTX_DECLSPEC ret_type NVTX_API fn_name signature { \
52
+ NVTX_SET_NAME_MANGLING_OPTIONS \
53
+ NVTX_EXT_HELPER_UNUSED_ARGS arg_names \
54
+ NVTX_EXT_FN_RETURN_INVALID(ret_type) \
55
+ }
56
+
57
+ #define NVTX_EXT_PAYLOAD_IMPL_FN_NOARGS_V1(ret_type, fn_name) \
58
+ NVTX_DECLSPEC ret_type NVTX_API fn_name (void) { \
59
+ NVTX_SET_NAME_MANGLING_OPTIONS \
60
+ NVTX_EXT_FN_RETURN_INVALID(ret_type) \
61
+ }
62
+
63
+ #else /* NVTX_DISABLE */
64
+
65
+ #include "nvtxExtPayloadTypeInfo.h"
66
+
67
+ /*
68
+ * Function slots for the payload extension. First entry is the module state,
69
+ * initialized to `0` (`NVTX_EXTENSION_FRESH`).
70
+ */
71
+ #define NVTX_EXT_PAYLOAD_SLOT_COUNT 63
72
+
73
+ NVTX_LINKONCE_DEFINE_GLOBAL intptr_t
74
+ NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots)[NVTX_EXT_PAYLOAD_SLOT_COUNT + 1]
75
+ = {0};
76
+
77
+ /* `NVTX_LINKONCE_FWDDECL_FUNCTION` is used to avoid warnings about missing prototypes. */
78
+
79
+ /* This helper returns always `1` as `uint8_t`. */
80
+ NVTX_LINKONCE_FWDDECL_FUNCTION uint8_t NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxReturnOne)(void);
81
+ NVTX_LINKONCE_DEFINE_FUNCTION uint8_t NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxReturnOne)(void)
82
+ {
83
+ return NVTX_STATIC_CAST(uint8_t, 1);
84
+ }
85
+
86
+ /*
87
+ * If a tool is attached, but does not handle `nvtxDomainIsEnabled`, the latter
88
+ * will always return `1` (enabled).
89
+ */
90
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxInitIsDomainEnabledFn)(void);
91
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxInitIsDomainEnabledFn)(void)
92
+ {
93
+ intptr_t* pSlot = &NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots)[NVTX3EXT_CBID_nvtxDomainIsEnabled + 1];
94
+
95
+ /* The initialization disables all slots that have not been set by the tool. */
96
+ if (*pSlot == NVTX_EXTENSION_DISABLED)
97
+ {
98
+ intptr_t* moduleState = NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots);
99
+ int isInitFnSet =
100
+ NVTX_VERSIONED_IDENTIFIER(nvtxExtGlobals1).injectionFnPtr != NVTX_NULLPTR;
101
+
102
+ /* Make `nvtxDomainIsEnabled` return `1`, if the tool does not provide an extension
103
+ initialization function or if the tool does not handle `nvtxDomainIsEnabled`. */
104
+ if (*moduleState == NVTX_EXTENSION_DISABLED ||
105
+ (isInitFnSet && *moduleState != NVTX_EXTENSION_INIT_FN_FAILED))
106
+ {
107
+ *pSlot = NVTX_REINTERPRET_CAST(intptr_t, NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxReturnOne));
108
+ }
109
+ }
110
+ }
111
+
112
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadInitOnce)(void);
113
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadInitOnce)(void)
114
+ {
115
+ intptr_t* fnSlots = NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots) + 1;
116
+ nvtxExtModuleSegment_t segment = {
117
+ 0, /* unused (only one segment) */
118
+ NVTX_EXT_PAYLOAD_SLOT_COUNT,
119
+ NVTX_NULLPTR /* function slots */
120
+ };
121
+
122
+ nvtxExtModuleInfo_t module_info = {
123
+ NVTX_VERSION, sizeof(nvtxExtModuleInfo_t),
124
+ NVTX_EXT_PAYLOAD_MODULEID, NVTX_EXT_PAYLOAD_COMPATID,
125
+ 1, NVTX_NULLPTR, /* number of segments, segments */
126
+ NVTX_NULLPTR, /* no export function needed */
127
+ /* bake type sizes and alignment information into program binary */
128
+ &(NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadTypeInfo))
129
+ };
130
+
131
+ segment.functionSlots = fnSlots;
132
+ module_info.segments = &segment;
133
+
134
+ NVTX_INFO( "%s\n", __FUNCTION__ );
135
+
136
+ NVTX_VERSIONED_IDENTIFIER(nvtxExtInitOnce)(&module_info,
137
+ NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots));
138
+
139
+ NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxInitIsDomainEnabledFn)();
140
+ }
141
+
142
+ #define NVTX_EXT_PAYLOAD_IMPL_FN_V1(ret_type, fn_name, signature, arg_names) \
143
+ typedef ret_type (*fn_name##_impl_fntype)signature; \
144
+ NVTX_DECLSPEC ret_type NVTX_API fn_name signature { \
145
+ NVTX_SET_NAME_MANGLING_OPTIONS \
146
+ intptr_t* pSlot = &NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots)[NVTX3EXT_CBID_##fn_name + 1]; \
147
+ intptr_t slot = *pSlot; \
148
+ if (slot != NVTX_EXTENSION_DISABLED) { \
149
+ if (slot != NVTX_EXTENSION_FRESH) { \
150
+ NVTX_EXT_FN_RETURN (*NVTX_REINTERPRET_CAST(fn_name##_impl_fntype, slot)) arg_names; \
151
+ } else { \
152
+ NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadInitOnce)(); \
153
+ /* Re-read function slot after extension initialization. */ \
154
+ slot = *pSlot; \
155
+ if (slot != NVTX_EXTENSION_DISABLED && slot != NVTX_EXTENSION_FRESH) { \
156
+ NVTX_EXT_FN_RETURN (*NVTX_REINTERPRET_CAST(fn_name##_impl_fntype, slot)) arg_names; \
157
+ } \
158
+ } \
159
+ } \
160
+ NVTX_EXT_FN_RETURN_INVALID(ret_type) /* No tool attached. */ \
161
+ }
162
+
163
+ #define NVTX_EXT_PAYLOAD_IMPL_FN_NOARGS_V1(ret_type, fn_name) \
164
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(ret_type, fn_name, (void), ())
165
+
166
+ #endif /* NVTX_DISABLE */
167
+
168
+ /* Push/pop functions return `NVTX_NO_PUSH_POP_TRACKING` if no tool is attached. */
169
+ #define NVTX_EXT_FN_RETURN return
170
+ #define NVTX_EXT_FN_RETURN_INVALID(rtype) return NVTX_NO_PUSH_POP_TRACKING;
171
+
172
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(int, nvtxRangePushPayload,
173
+ (nvtxDomainHandle_t domain, const nvtxPayloadData_t* payloadData, size_t count),
174
+ (domain, payloadData, count))
175
+
176
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(int, nvtxRangePopPayload,
177
+ (nvtxDomainHandle_t domain, const nvtxPayloadData_t* payloadData, size_t count),
178
+ (domain, payloadData, count))
179
+
180
+ #undef NVTX_EXT_FN_RETURN
181
+ #undef NVTX_EXT_FN_RETURN_INVALID
182
+
183
+ /* Non-void functions. */
184
+ #define NVTX_EXT_FN_RETURN return
185
+ #define NVTX_EXT_FN_RETURN_INVALID(rtype) return NVTX_STATIC_CAST(rtype, 0);
186
+
187
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(uint64_t, nvtxPayloadSchemaRegister,
188
+ (nvtxDomainHandle_t domain, const nvtxPayloadSchemaAttr_t* attr),
189
+ (domain, attr))
190
+
191
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(uint64_t, nvtxPayloadEnumRegister,
192
+ (nvtxDomainHandle_t domain, const nvtxPayloadEnumAttr_t* attr),
193
+ (domain, attr))
194
+
195
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(nvtxRangeId_t, nvtxRangeStartPayload,
196
+ (nvtxDomainHandle_t domain, const nvtxPayloadData_t* payloadData, size_t count),
197
+ (domain, payloadData, count))
198
+
199
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(uint8_t, nvtxDomainIsEnabled, (nvtxDomainHandle_t domain), (domain))
200
+
201
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(uint64_t, nvtxScopeRegister, (nvtxDomainHandle_t domain,
202
+ const nvtxScopeAttr_t* attr), (domain, attr))
203
+
204
+ NVTX_EXT_PAYLOAD_IMPL_FN_NOARGS_V1(int64_t, nvtxTimestampGet)
205
+
206
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(uint64_t, nvtxTimeDomainRegister,
207
+ (nvtxDomainHandle_t domain, const nvtxTimeDomainAttr_t* attr),
208
+ (domain, attr))
209
+
210
+ #undef NVTX_EXT_FN_RETURN
211
+ #undef NVTX_EXT_FN_RETURN_INVALID
212
+ /* END: Non-void functions. */
213
+
214
+ /* void functions. */
215
+ #define NVTX_EXT_FN_RETURN
216
+ #define NVTX_EXT_FN_RETURN_INVALID(rtype)
217
+
218
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxMarkPayload, (nvtxDomainHandle_t domain,
219
+ const nvtxPayloadData_t* payloadData, size_t count), (domain, payloadData, count))
220
+
221
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxRangeEndPayload, (nvtxDomainHandle_t domain,
222
+ nvtxRangeId_t id, const nvtxPayloadData_t* payloadData, size_t count),
223
+ (domain, id, payloadData, count))
224
+
225
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxTimerSource,
226
+ (nvtxDomainHandle_t domain, uint64_t timeDomainId, uint64_t flags, int64_t (*timestampProviderFn)(void)),
227
+ (domain, timeDomainId, flags, timestampProviderFn))
228
+
229
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxTimerSourceWithData,
230
+ (nvtxDomainHandle_t domain, uint64_t timeDomainId, uint64_t flags, int64_t (*timestampProviderFn)(void* data), void* data),
231
+ (domain, timeDomainId, flags, timestampProviderFn, data))
232
+
233
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxTimeSyncPoint,
234
+ (nvtxDomainHandle_t domain, uint64_t timeDomainId1, uint64_t timeDomainId2,
235
+ int64_t timestamp1, int64_t timestamp2),
236
+ (domain, timeDomainId1, timeDomainId2, timestamp1, timestamp2))
237
+
238
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxTimeSyncPointTable,
239
+ (nvtxDomainHandle_t domain, uint64_t timeDomainIdSrc, uint64_t timeDomainIdDst,
240
+ const nvtxSyncPoint_t* syncPoints, size_t count),
241
+ (domain, timeDomainIdSrc, timeDomainIdDst, syncPoints, count))
242
+
243
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxTimestampConversionFactor,
244
+ (nvtxDomainHandle_t domain, uint64_t timeDomainIdSrc, uint64_t timeDomainIdDst,
245
+ double slope, int64_t timestampSrc, int64_t timestampDst),
246
+ (domain, timeDomainIdSrc, timeDomainIdDst, slope, timestampSrc, timestampDst))
247
+
248
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxEventSubmit,
249
+ (nvtxDomainHandle_t domain, const nvtxPayloadData_t* payloadData, size_t numPayloads),
250
+ (domain, payloadData, numPayloads))
251
+
252
+ NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxEventBatchSubmit, (nvtxDomainHandle_t domain,
253
+ const nvtxEventBatch_t* eventBatch), (domain, eventBatch))
254
+
255
+ #undef NVTX_EXT_FN_RETURN
256
+ #undef NVTX_EXT_FN_RETURN_INVALID
257
+ /* END: void functions. */
258
+
259
+ /* Keep NVTX_EXT_PAYLOAD_IMPL_FN_V1 defined for a future version of this extension. */
260
+
261
+ #ifdef __cplusplus
262
+ } /* extern "C" */
263
+ #endif /* __cplusplus */
264
+
265
+ #endif /* NVTX_EXT_IMPL_PAYLOAD_V1 */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtInit.h ADDED
@@ -0,0 +1,437 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_EXT_INIT_GUARD
22
+ #error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+ #ifdef __cplusplus
36
+ extern "C" {
37
+ #endif /* __cplusplus */
38
+
39
+ /* ---- Platform-independent helper definitions and functions ---- */
40
+
41
+ /* Prefer macros over inline functions to reduce symbol resolution at link time */
42
+
43
+ #if defined(_WIN32)
44
+ #define NVTX_ATOMIC_WRITE_PTR(address, value) \
45
+ InterlockedExchangePointer(NVTX_REINTERPRET_CAST(volatile PVOID*, (address)), \
46
+ NVTX_REINTERPRET_CAST(PVOID, (value)))
47
+ #define NVTX_ATOMIC_CAS_PTR(old, address, exchange, comparand) \
48
+ (old) = NVTX_REINTERPRET_CAST(intptr_t, InterlockedCompareExchangePointer( \
49
+ NVTX_REINTERPRET_CAST(volatile PVOID*, (address)), \
50
+ NVTX_REINTERPRET_CAST(PVOID, (exchange)), \
51
+ NVTX_REINTERPRET_CAST(PVOID, (comparand))))
52
+ #elif defined(__GNUC__)
53
+ /* Ensure full memory barrier for atomics, to match Windows functions */
54
+ #define NVTX_ATOMIC_WRITE_PTR(address, value) \
55
+ __sync_synchronize(); *address = value; __sync_synchronize()
56
+ #define NVTX_ATOMIC_CAS_PTR(old, address, exchange, comparand) \
57
+ old = __sync_val_compare_and_swap(address, comparand, exchange)
58
+ #else
59
+ #error The library does not support your configuration!
60
+ #endif
61
+
62
+ #ifndef NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
63
+ /* Define this to 1 for platforms that where pre-injected libraries can be discovered. */
64
+ #if defined(_WIN32)
65
+ /* Windows has no process-wide table of dynamic library symbols, so this can't be supported. */
66
+ #define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0
67
+ #else
68
+ /* POSIX platforms allow calling dlsym on a null module to use the process-wide table.
69
+ * Note: Still disabled in load sequence version 2. Needs to support following the
70
+ * RTLD_NEXT chain, and needs more testing before support can be enabled by default. */
71
+ #define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0
72
+ #endif
73
+ #endif
74
+
75
+ #ifndef NVTX_SUPPORT_ENV_VARS
76
+ /* Define this to 1 for platforms that support environment variables. */
77
+ /* TODO: Detect UWP, a.k.a. Windows Store app, and set this to 0. */
78
+ /* Try: #if defined(WINAPI_FAMILY_PARTITION) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) */
79
+ #define NVTX_SUPPORT_ENV_VARS 1
80
+ #endif
81
+
82
+ #ifndef NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY
83
+ /* Define this to 1 for platforms that support dynamic/shared libraries */
84
+ #define NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY 1
85
+ #endif
86
+
87
+ #ifndef NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE
88
+ #if defined(__ANDROID__)
89
+ #define NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE 1
90
+ #else
91
+ #define NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE 0
92
+ #endif
93
+ #endif
94
+
95
+ #ifndef NVTX_SUPPORT_STATIC_INJECTION_LIBRARY
96
+ /* On platforms that support weak symbols (i.e. non-Windows), injection libraries may
97
+ * be statically linked into an application. This is useful for platforms where dynamic
98
+ * injection is not available. Weak symbols not marked extern are definitions, not just
99
+ * declarations. They are guaranteed to be initialized to zero if no normal definitions
100
+ * are found by the linker to override them. This means the NVTX load sequence can safely
101
+ * detect the presence of a static injection -- if InitializeInjectionNvtxExtension_fnptr is zero,
102
+ * there is no static injection. */
103
+ #if defined(__GNUC__) && !defined(_WIN32) && !defined(__CYGWIN__)
104
+ #define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 1
105
+ #else
106
+ #define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 0
107
+ #endif
108
+ #endif
109
+
110
+ #if NVTX_SUPPORT_STATIC_INJECTION_LIBRARY && !defined(NVTX_STATIC_INJECTION_IMPL)
111
+ /* To make an NVTX injection library support static injection, it must do these things:
112
+ * - Define InitializeInjectionNvtxExtension_fnptr as a normal symbol (not weak), pointing to
113
+ * the implementation of InitializeInjectionNvtxExtension (which does not need to be a
114
+ * dynamic export if only supporting static injection).
115
+ * - Define NVTX_STATIC_INJECTION_IMPL so the weak definition below is skipped.
116
+ * - Compile the static injection files with -fPIC if they are to be linked with other
117
+ * files compiled this way. If you forget this, GCC will simply tell you to add it.
118
+ * When building the application, there a few ways to link in a static injection:
119
+ * - Compile the injection's source files normally, and include the .o files as inputs
120
+ * to the linker.
121
+ * - If the injection is provided as an archive (.a file), it will not resolve any
122
+ * unresolved symbols, so the linker will skip it by default. This can be fixed
123
+ * by wrapping the static injection's name on the linker command line with options
124
+ * to treat it differently. For example:
125
+ * gcc example.o libfoo.a -Wl,--whole-archive libinj-static.a -Wl,--no-whole-archive libbar.a
126
+ * Note that libinj-static.a is bracketed by options to turn on "whole archive" and
127
+ * then back off again afterwards, so libfoo.a and libbar.a are linked normally.
128
+ * - In CMake, a static injection can be added with options like this:
129
+ * target_link_libraries(app PRIVATE -Wl,--whole-archive inj-static -Wl,--no-whole-archive)
130
+ */
131
+ __attribute__((weak)) NvtxExtInitializeInjectionFunc_t InitializeInjectionNvtxExtension_fnptr;
132
+ #endif
133
+
134
+ /* This function tries to find or load an NVTX injection library and get the
135
+ * address of its InitializeInjectionExtension function. If such a function pointer
136
+ * is found, it is called, and passed the address of this NVTX instance's
137
+ * nvtxGetExportTable function, so the injection can attach to this instance.
138
+ * If the initialization fails for any reason, any dynamic library loaded will
139
+ * be freed, and all NVTX implementation functions will be set to no-ops. If
140
+ * initialization succeeds, NVTX functions not attached to the tool will be set
141
+ * to no-ops. This is implemented as one function instead of several small
142
+ * functions to minimize the number of weak symbols the linker must resolve.
143
+ * Order of search is:
144
+ * - Pre-injected library exporting InitializeInjectionNvtxExtension
145
+ * - Loadable library exporting InitializeInjectionNvtxExtension
146
+ * - Path specified by env var NVTX_INJECTION??_PATH (?? is 32 or 64)
147
+ * - On Android, libNvtxInjection??.so within the package (?? is 32 or 64)
148
+ * - Statically-linked injection library defining InitializeInjectionNvtxExtension_fnptr
149
+ */
150
+ NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxExtLoadInjectionLibrary)(
151
+ NvtxExtInitializeInjectionFunc_t* out_init_fnptr);
152
+ NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxExtLoadInjectionLibrary)(
153
+ NvtxExtInitializeInjectionFunc_t* out_init_fnptr)
154
+ {
155
+ static const char initFuncName[] = "InitializeInjectionNvtxExtension";
156
+ #if NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
157
+ static const char initFuncPreinjectName[] = "InitializeInjectionNvtxExtensionPreinject";
158
+ #endif
159
+ NvtxExtInitializeInjectionFunc_t init_fnptr = NVTX_NULLPTR;
160
+ NVTX_DLLHANDLE injectionLibraryHandle = NVTX_DLLDEFAULT;
161
+
162
+ if (out_init_fnptr)
163
+ {
164
+ *out_init_fnptr = NVTX_NULLPTR;
165
+ }
166
+
167
+ #if NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY
168
+ /* Try discovering dynamic injection library to load */
169
+ {
170
+ #if NVTX_SUPPORT_ENV_VARS
171
+ /* If env var NVTX_INJECTION64_PATH is set, it should contain the path
172
+ to a 64-bit dynamic NVTX injection library (and similar for 32-bit). */
173
+ const NVTX_PATHCHAR* const nvtxEnvVarName = (sizeof(void*) == 4)
174
+ ? NVTX_STR("NVTX_INJECTION32_PATH")
175
+ : NVTX_STR("NVTX_INJECTION64_PATH");
176
+ #endif /* NVTX_SUPPORT_ENV_VARS */
177
+ NVTX_PATHCHAR injectionLibraryPathBuf[NVTX_BUFSIZE];
178
+ const NVTX_PATHCHAR* injectionLibraryPath = NVTX_NULLPTR;
179
+
180
+ /* Refer to this variable explicitly in case all references to it are #if'ed out. */
181
+ (void)injectionLibraryPathBuf;
182
+
183
+ #if NVTX_SUPPORT_ENV_VARS
184
+ /* Disable the warning for getenv & _wgetenv -- this usage is safe because
185
+ these functions are not called again before using the returned value. */
186
+ #if defined(_MSC_VER)
187
+ #pragma warning( push )
188
+ #pragma warning( disable : 4996 )
189
+ #endif
190
+ injectionLibraryPath = NVTX_GETENV(nvtxEnvVarName);
191
+ #if defined(_MSC_VER)
192
+ #pragma warning( pop )
193
+ #endif
194
+ #endif
195
+
196
+ #if NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE
197
+ if (!injectionLibraryPath)
198
+ {
199
+ const char *bits = (sizeof(void*) == 4) ? "32" : "64";
200
+ char cmdlineBuf[32];
201
+ char pkgName[PATH_MAX];
202
+ int count;
203
+ int pid;
204
+ FILE *fp;
205
+ size_t bytesRead;
206
+ size_t pos;
207
+
208
+ pid = NVTX_STATIC_CAST(int, getpid());
209
+ count = snprintf(cmdlineBuf, sizeof(cmdlineBuf), "/proc/%d/cmdline", pid);
210
+ if (count <= 0 || count >= NVTX_STATIC_CAST(int, sizeof(cmdlineBuf)))
211
+ {
212
+ NVTX_ERR("Path buffer too small for: /proc/%d/cmdline\n", pid);
213
+ return NVTX_ERR_INIT_ACCESS_LIBRARY;
214
+ }
215
+
216
+ fp = fopen(cmdlineBuf, "r");
217
+ if (!fp)
218
+ {
219
+ NVTX_ERR("File couldn't be opened: %s\n", cmdlineBuf);
220
+ return NVTX_ERR_INIT_ACCESS_LIBRARY;
221
+ }
222
+
223
+ bytesRead = fread(pkgName, 1, sizeof(pkgName) - 1, fp);
224
+ fclose(fp);
225
+ if (bytesRead == 0)
226
+ {
227
+ NVTX_ERR("Package name couldn't be read from file: %s\n", cmdlineBuf);
228
+ return NVTX_ERR_INIT_ACCESS_LIBRARY;
229
+ }
230
+
231
+ pkgName[bytesRead] = 0;
232
+
233
+ /* String can contain colon as a process separator. In this case the
234
+ package name is before the colon. */
235
+ pos = 0;
236
+ while (pos < bytesRead && pkgName[pos] != ':' && pkgName[pos] != '\0')
237
+ {
238
+ ++pos;
239
+ }
240
+ pkgName[pos] = 0;
241
+
242
+ count = snprintf(injectionLibraryPathBuf, NVTX_BUFSIZE, "/data/data/%s/files/libNvtxInjection%s.so", pkgName, bits);
243
+ if (count <= 0 || count >= NVTX_BUFSIZE)
244
+ {
245
+ NVTX_ERR("Path buffer too small for: /data/data/%s/files/libNvtxInjection%s.so\n", pkgName, bits);
246
+ return NVTX_ERR_INIT_ACCESS_LIBRARY;
247
+ }
248
+
249
+ /* On Android, verify path is accessible due to aggressive file access restrictions. */
250
+ /* For dlopen, if the filename contains a leading slash, then it is interpreted as a */
251
+ /* relative or absolute pathname; otherwise it will follow the rules in ld.so. */
252
+ if (injectionLibraryPathBuf[0] == '/')
253
+ {
254
+ #if (__ANDROID_API__ < 21)
255
+ int access_err = access(injectionLibraryPathBuf, F_OK | R_OK);
256
+ #else
257
+ int access_err = faccessat(AT_FDCWD, injectionLibraryPathBuf, F_OK | R_OK, 0);
258
+ #endif
259
+ if (access_err != 0)
260
+ {
261
+ NVTX_ERR("Injection library path wasn't accessible [code=%s] [path=%s]\n", strerror(errno), injectionLibraryPathBuf);
262
+ return NVTX_ERR_INIT_ACCESS_LIBRARY;
263
+ }
264
+ }
265
+ injectionLibraryPath = injectionLibraryPathBuf;
266
+ }
267
+ #endif /* NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE */
268
+
269
+ /* At this point, `injectionLibraryPath` is specified if a dynamic
270
+ injection library was specified by a tool. */
271
+ if (injectionLibraryPath)
272
+ {
273
+ /* Load the injection library */
274
+ injectionLibraryHandle = NVTX_DLLOPEN(injectionLibraryPath);
275
+ if (!injectionLibraryHandle)
276
+ {
277
+ NVTX_ERR("Failed to load injection library\n");
278
+ return NVTX_ERR_INIT_LOAD_LIBRARY;
279
+ }
280
+ else
281
+ {
282
+ /* Attempt to get the injection library's entry-point. */
283
+ init_fnptr = NVTX_REINTERPRET_CAST(NvtxExtInitializeInjectionFunc_t, NVTX_DLLFUNC(injectionLibraryHandle, initFuncName));
284
+ if (!init_fnptr)
285
+ {
286
+ NVTX_DLLCLOSE(injectionLibraryHandle);
287
+ NVTX_ERR("Failed to get address of function %s from injection library\n", initFuncName);
288
+ return NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT;
289
+ }
290
+ }
291
+ }
292
+ }
293
+ #endif /* NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY */
294
+
295
+ #if NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
296
+ if (!init_fnptr)
297
+ {
298
+ /* Use POSIX global symbol chain to query for init function from any module */
299
+ init_fnptr = NVTX_REINTERPRET_CAST(NvtxExtInitializeInjectionFunc_t, NVTX_DLLFUNC(NVTX_DLLDEFAULT, initFuncPreinjectName));
300
+ }
301
+ #endif
302
+
303
+ #if NVTX_SUPPORT_STATIC_INJECTION_LIBRARY
304
+ if (!init_fnptr)
305
+ {
306
+ /* Check weakly-defined function pointer. A statically-linked injection can define this
307
+ * as a normal symbol and set it to the address of the NVTX init function -- this will
308
+ * provide a non-null value here. If there is no other definition of this symbol, it
309
+ * will be null here. */
310
+ if (InitializeInjectionNvtxExtension_fnptr)
311
+ {
312
+ init_fnptr = InitializeInjectionNvtxExtension_fnptr;
313
+ }
314
+ }
315
+ #endif
316
+
317
+ if (out_init_fnptr)
318
+ {
319
+ *out_init_fnptr = init_fnptr;
320
+ }
321
+
322
+ /* At this point, if `init_fnptr` is not set, no tool has specified an NVTX injection library.
323
+ Non-success result is returned, so that all NVTX API functions will be set to no-ops. */
324
+ if (!init_fnptr)
325
+ {
326
+ return NVTX_ERR_NO_INJECTION_LIBRARY_AVAILABLE;
327
+ }
328
+
329
+ return NVTX_SUCCESS;
330
+ }
331
+
332
+ /* Avoid warnings about missing prototypes. */
333
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxExtInitOnce) (
334
+ nvtxExtModuleInfo_t* moduleInfo, intptr_t* moduleState);
335
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxExtInitOnce) (
336
+ nvtxExtModuleInfo_t* moduleInfo, intptr_t* moduleState)
337
+ {
338
+ intptr_t old;
339
+
340
+ NVTX_INFO( "%s\n", __FUNCTION__ );
341
+
342
+ if (*moduleState == NVTX_EXTENSION_LOADED ||
343
+ *moduleState == NVTX_EXTENSION_DISABLED ||
344
+ *moduleState == NVTX_EXTENSION_INIT_FN_FAILED)
345
+ {
346
+ NVTX_INFO("Module loaded\n");
347
+ return;
348
+ }
349
+
350
+ NVTX_ATOMIC_CAS_PTR(
351
+ old,
352
+ moduleState,
353
+ NVTX_EXTENSION_STARTING,
354
+ NVTX_EXTENSION_FRESH);
355
+ if (old == NVTX_EXTENSION_FRESH)
356
+ {
357
+ intptr_t stateReturnValue = NVTX_EXTENSION_LOADED;
358
+ NvtxExtInitializeInjectionFunc_t init_fnptr =
359
+ NVTX_VERSIONED_IDENTIFIER(nvtxExtGlobals1).injectionFnPtr;
360
+ int entryPointStatus = 0;
361
+ int forceAllToNoops = 0;
362
+ size_t s;
363
+
364
+ /* Load and initialize injection library, which will assign the function pointers. */
365
+ if (init_fnptr == NVTX_NULLPTR)
366
+ {
367
+ int result = 0;
368
+
369
+ /* Try to load vanilla NVTX first. */
370
+ nvtxInitialize(NVTX_NULLPTR);
371
+
372
+ result = NVTX_VERSIONED_IDENTIFIER(nvtxExtLoadInjectionLibrary)(&init_fnptr);
373
+ /* At this point `init_fnptr` will be either 0 or a real function. */
374
+
375
+ if (result == NVTX_SUCCESS)
376
+ {
377
+ NVTX_VERSIONED_IDENTIFIER(nvtxExtGlobals1).injectionFnPtr = init_fnptr;
378
+ }
379
+ else
380
+ {
381
+ if (result == NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT)
382
+ {
383
+ stateReturnValue = NVTX_EXTENSION_DISABLED;
384
+ }
385
+ NVTX_ERR("Failed to load injection library.\n");
386
+ }
387
+ }
388
+
389
+ if (init_fnptr != NVTX_NULLPTR)
390
+ {
391
+ /* Invoke injection library's initialization function. If it returns
392
+ 0 (failure) and a dynamic injection was loaded, unload it. */
393
+ entryPointStatus = init_fnptr(moduleInfo);
394
+ if (entryPointStatus == 0)
395
+ {
396
+ stateReturnValue = NVTX_EXTENSION_INIT_FN_FAILED;
397
+ NVTX_ERR("Failed to initialize injection library -- initialization function returned 0\n");
398
+ }
399
+ }
400
+
401
+ /* Clean up any functions that are still uninitialized so that they are
402
+ skipped. Set all to null if injection init function failed as well. */
403
+ forceAllToNoops = (init_fnptr == NVTX_NULLPTR) || (entryPointStatus == 0);
404
+ for (s = 0; s < moduleInfo->segmentsCount; ++s)
405
+ {
406
+ nvtxExtModuleSegment_t* segment = moduleInfo->segments + s;
407
+ size_t i;
408
+ for (i = 0; i < segment->slotCount; ++i)
409
+ {
410
+ if (forceAllToNoops || (segment->functionSlots[i] == NVTX_EXTENSION_FRESH))
411
+ {
412
+ segment->functionSlots[i] = NVTX_EXTENSION_DISABLED;
413
+ }
414
+ }
415
+ }
416
+
417
+ NVTX_MEMBAR();
418
+
419
+ /* Signal that initialization has finished and the function pointers are set. */
420
+ NVTX_ATOMIC_WRITE_PTR(moduleState, stateReturnValue);
421
+ }
422
+ else /* Spin-wait until initialization has finished. */
423
+ {
424
+ NVTX_MEMBAR();
425
+ while (*moduleState != NVTX_EXTENSION_LOADED &&
426
+ *moduleState != NVTX_EXTENSION_DISABLED &&
427
+ *moduleState != NVTX_EXTENSION_INIT_FN_FAILED)
428
+ {
429
+ NVTX_YIELD();
430
+ NVTX_MEMBAR();
431
+ }
432
+ }
433
+ }
434
+
435
+ #ifdef __cplusplus
436
+ }
437
+ #endif /* __cplusplus */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtPayloadHelperInternal.h ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ #ifndef NVTX_EXT_PAYLOAD_HELPER_INTERNAL_H
32
+ #define NVTX_EXT_PAYLOAD_HELPER_INTERNAL_H
33
+
34
+ /* General helper macros */
35
+ #include "nvtxExtHelperMacros.h"
36
+
37
+ /* Get variable name with line number (almost unique per file). */
38
+ #define _NVTX_PAYLOAD_DATA_VAR NVTX_EXT_CONCAT(nvtxDFDB,__LINE__)
39
+
40
+ /* Create real arguments from just pasting tokens next to each other. */
41
+ #define _NVTX_PAYLOAD_PASS_THROUGH(...) __VA_ARGS__
42
+
43
+ /* Avoid prefixing `NVTX_PAYLOAD_ENTRY_` for nested payloads. */
44
+ #define NVTX_PAYLOAD_ENTRY_THROWAWAY
45
+ #define _NVTX_PAYLOAD_NESTED(id) THROWAWAY id
46
+
47
+ /*
48
+ * Create the NVTX binary payloads schema attributes.
49
+ *
50
+ * @param struct_id The name of the struct.
51
+ * @param schema_name The name of the schema.
52
+ * @param schema_flags Additional schema flags
53
+ * @param mask_add Fields to be added to the mask.
54
+ * @param num_entries The number schema entries.
55
+ */
56
+ #define NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, schema_name, schema_flags, schema_id, mask_add, num_entries) \
57
+ nvtxPayloadSchemaAttr_t struct_id##Attr = { \
58
+ /*.fieldMask = */NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_TYPE | mask_add \
59
+ NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_ENTRIES | \
60
+ NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NUM_ENTRIES | \
61
+ NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_STATIC_SIZE, \
62
+ /*.name = */schema_name, \
63
+ /*.type = */NVTX_PAYLOAD_SCHEMA_TYPE_STATIC, \
64
+ /*.flags = */schema_flags, \
65
+ /*.entries = */struct_id##Schema, /*.numEntries = */num_entries, \
66
+ /*.payloadStaticSize = */sizeof(struct_id), \
67
+ /*.packAlign = */0, /*.schemaId = */schema_id};
68
+
69
+
70
+ /*****************************************************************/
71
+ /*** Helper for `NVTX_DEFINE_SCHEMA_FOR_STRUCT[_AND_REGISTER]` ***/
72
+
73
+ /* First part of schema entry for different number of arguments. */
74
+ #define _NVTX_PAYLOAD_SCHEMA_EF2(member, etype) \
75
+ 0, NVTX_PAYLOAD_ENTRY_##etype, NVTX_NULLPTR, NVTX_NULLPTR, 0,
76
+ #define _NVTX_PAYLOAD_SCHEMA_EF3(member, etype, name) \
77
+ 0, NVTX_PAYLOAD_ENTRY_##etype, name, NVTX_NULLPTR, 0,
78
+ #define _NVTX_PAYLOAD_SCHEMA_EF4(member, etype, name, desc) \
79
+ 0, NVTX_PAYLOAD_ENTRY_##etype, name, desc, 0,
80
+ #define _NVTX_PAYLOAD_SCHEMA_EF5(member, etype, name, desc, arraylen) \
81
+ 0, NVTX_PAYLOAD_ENTRY_##etype, name, desc, arraylen,
82
+ #define _NVTX_PAYLOAD_SCHEMA_EF6(member, etype, name, desc, arraylen, flags) \
83
+ NVTX_PAYLOAD_ENTRY_FLAG_##flags, NVTX_PAYLOAD_ENTRY_##etype, name, desc, arraylen,
84
+
85
+ #define _NVTX_PAYLOAD_SCHEMA_ENTRY_FRONT(...) \
86
+ NVTX_EXT_CONCAT(_NVTX_PAYLOAD_SCHEMA_EF, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
87
+
88
+ /* Second part of schema entry (append struct member).
89
+ (At least two arguments are passed (`member` and `etype`). */
90
+ #define _NVTX_PAYLOAD_SCHEMA_ENTRY_END(member, ...) member
91
+
92
+ /* Resolve to schema entry. `entry` is `(ctype, name, ...)`. */
93
+ #define _NVTX_PAYLOAD_SCHEMA_ENTRY(struct_id, entry) \
94
+ {_NVTX_PAYLOAD_SCHEMA_ENTRY_FRONT entry \
95
+ offsetof(struct_id, _NVTX_PAYLOAD_SCHEMA_ENTRY_END entry)},
96
+
97
+ /* Handle up to 16 schema entries. */
98
+ #define _NVTX_PAYLOAD_SME1(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1)
99
+ #define _NVTX_PAYLOAD_SME2(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME1(s,__VA_ARGS__)
100
+ #define _NVTX_PAYLOAD_SME3(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME2(s,__VA_ARGS__)
101
+ #define _NVTX_PAYLOAD_SME4(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME3(s,__VA_ARGS__)
102
+ #define _NVTX_PAYLOAD_SME5(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME4(s,__VA_ARGS__)
103
+ #define _NVTX_PAYLOAD_SME6(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME5(s,__VA_ARGS__)
104
+ #define _NVTX_PAYLOAD_SME7(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME6(s,__VA_ARGS__)
105
+ #define _NVTX_PAYLOAD_SME8(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME7(s,__VA_ARGS__)
106
+ #define _NVTX_PAYLOAD_SME9(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME8(s,__VA_ARGS__)
107
+ #define _NVTX_PAYLOAD_SME10(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME9(s,__VA_ARGS__)
108
+ #define _NVTX_PAYLOAD_SME11(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME10(s,__VA_ARGS__)
109
+ #define _NVTX_PAYLOAD_SME12(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME11(s,__VA_ARGS__)
110
+ #define _NVTX_PAYLOAD_SME13(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME12(s,__VA_ARGS__)
111
+ #define _NVTX_PAYLOAD_SME14(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME13(s,__VA_ARGS__)
112
+ #define _NVTX_PAYLOAD_SME15(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME14(s,__VA_ARGS__)
113
+ #define _NVTX_PAYLOAD_SME16(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME15(s,__VA_ARGS__)
114
+
115
+ #define _NVTX_PAYLOAD_SCHEMA_ENTRIES(struct_id, ...) \
116
+ nvtxPayloadSchemaEntry_t struct_id##Schema[] = { \
117
+ NVTX_EXT_CONCAT(_NVTX_PAYLOAD_SME, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(struct_id, __VA_ARGS__) \
118
+ {0, 0} \
119
+ };
120
+
121
+ /*
122
+ * Handle optional parameters for `NVTX_DEFINE_SCHEMA_FOR_STRUCT[_AND_REGISTER]`.
123
+ */
124
+ #define _NVTX_DEFINE_S4S_6(struct_id, schema_name, prefix, schema_flags, schema_id, entries) \
125
+ prefix _NVTX_PAYLOAD_SCHEMA_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
126
+ prefix NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, schema_name, schema_flags, schema_id, \
127
+ NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_FLAGS | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_SCHEMA_ID |,\
128
+ NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
129
+ #define _NVTX_DEFINE_S4S_5(struct_id, schema_name, prefix, schema_flags, entries) \
130
+ prefix _NVTX_PAYLOAD_SCHEMA_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
131
+ prefix NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, schema_name, schema_flags, 0, \
132
+ NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_FLAGS |, \
133
+ NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
134
+ #define _NVTX_DEFINE_S4S_4(struct_id, schema_name, prefix, entries) \
135
+ prefix _NVTX_PAYLOAD_SCHEMA_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
136
+ prefix NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, schema_name, NVTX_PAYLOAD_SCHEMA_FLAG_NONE, 0, \
137
+ NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME |, \
138
+ NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
139
+ #define _NVTX_DEFINE_S4S_3(struct_id, schema_name, entries) \
140
+ _NVTX_DEFINE_S4S_4(struct_id, schema_name, /*prefix*/, entries)
141
+ #define _NVTX_DEFINE_S4S_2(struct_id, entries) \
142
+ _NVTX_PAYLOAD_SCHEMA_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
143
+ NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, NVTX_NULLPTR, NVTX_PAYLOAD_SCHEMA_FLAG_NONE, 0, ,\
144
+ NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
145
+
146
+ #define _NVTX_DEFINE_SCHEMA_FOR_STRUCT(struct_id, ...) \
147
+ NVTX_EXT_CONCAT(_NVTX_DEFINE_S4S_, \
148
+ NVTX_EXT_NUM_ARGS(struct_id, __VA_ARGS__))(struct_id, __VA_ARGS__)
149
+
150
+ /*** END: Helper for `NVTX_PAYLOAD_STATIC_SCHEMA_{DEFINE,SETUP}` ***/
151
+
152
+
153
+ /******************************************************************/
154
+ /*** Helper for `NVTX_DEFINE_STRUCT_WITH_SCHEMA[_AND_REGISTER]` ***/
155
+
156
+ /* Extract struct member for fixed-size arrays. */
157
+ #define _NVTX_PAYLOAD_STRUCT_ARR_MEM1(name) name
158
+ #define _NVTX_PAYLOAD_STRUCT_ARR_MEM2(name, count) name[count]
159
+
160
+ /* Extract type and member name and handle special case of fixed-size array. */
161
+ #define _NVTX_PAYLOAD_STRUCT_E2(type, member) type member;
162
+ #define _NVTX_PAYLOAD_STRUCT_E3(type, member, etype) type member;
163
+ #define _NVTX_PAYLOAD_STRUCT_E4(type, member, etype, name) type member;
164
+ #define _NVTX_PAYLOAD_STRUCT_E5(type, member, etype, name, desc) type member;
165
+ #define _NVTX_PAYLOAD_STRUCT_E6(type, member, etype, name, desc, arraylen) \
166
+ type NVTX_EXT_CONCAT(_NVTX_PAYLOAD_STRUCT_ARR_MEM, NVTX_EXT_NUM_ARGS member) member;
167
+ #define _NVTX_PAYLOAD_STRUCT_E7(type, member, etype, name, desc, arraylen, flags) \
168
+ _NVTX_PAYLOAD_STRUCT_E6(type, member, etype, name, desc, arraylen)
169
+
170
+ /* Handle different number of arguments per struct entry. */
171
+ #define _NVTX_PAYLOAD_STRUCT_ENTRY_(...) \
172
+ NVTX_EXT_CONCAT(_NVTX_PAYLOAD_STRUCT_E, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
173
+
174
+ /* Handle up to 16 struct members. */
175
+ #define _NVTX_PAYLOAD_STRUCT_ENTRY(entry) _NVTX_PAYLOAD_STRUCT_ENTRY_ entry
176
+ #define _NVTX_PAYLOAD_STRUCT1(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1)
177
+ #define _NVTX_PAYLOAD_STRUCT2(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT1(__VA_ARGS__)
178
+ #define _NVTX_PAYLOAD_STRUCT3(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT2(__VA_ARGS__)
179
+ #define _NVTX_PAYLOAD_STRUCT4(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT3(__VA_ARGS__)
180
+ #define _NVTX_PAYLOAD_STRUCT5(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT4(__VA_ARGS__)
181
+ #define _NVTX_PAYLOAD_STRUCT6(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT5(__VA_ARGS__)
182
+ #define _NVTX_PAYLOAD_STRUCT7(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT6(__VA_ARGS__)
183
+ #define _NVTX_PAYLOAD_STRUCT8(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT7(__VA_ARGS__)
184
+ #define _NVTX_PAYLOAD_STRUCT9(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT8(__VA_ARGS__)
185
+ #define _NVTX_PAYLOAD_STRUCT10(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT9(__VA_ARGS__)
186
+ #define _NVTX_PAYLOAD_STRUCT11(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT10(__VA_ARGS__)
187
+ #define _NVTX_PAYLOAD_STRUCT12(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT11(__VA_ARGS__)
188
+ #define _NVTX_PAYLOAD_STRUCT13(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT12(__VA_ARGS__)
189
+ #define _NVTX_PAYLOAD_STRUCT14(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT13(__VA_ARGS__)
190
+ #define _NVTX_PAYLOAD_STRUCT15(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT14(__VA_ARGS__)
191
+ #define _NVTX_PAYLOAD_STRUCT16(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT15(__VA_ARGS__)
192
+
193
+ /* Generate the typedef. */
194
+ #define _NVTX_PAYLOAD_TYPEDEF_STRUCT(struct_id, ...) \
195
+ typedef struct { \
196
+ NVTX_EXT_CONCAT(_NVTX_PAYLOAD_STRUCT, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__) \
197
+ } struct_id;
198
+
199
+ /* Generate first part of the schema entry. */
200
+ #define _NVTX_PAYLOAD_INIT_SCHEMA_N3(type, memberId, etype) \
201
+ 0, NVTX_PAYLOAD_ENTRY_##etype, NVTX_NULLPTR, NVTX_NULLPTR, 0,
202
+ #define _NVTX_PAYLOAD_INIT_SCHEMA_N4(type, memberId, etype, name) \
203
+ 0, NVTX_PAYLOAD_ENTRY_##etype, name, NVTX_NULLPTR, 0,
204
+ #define _NVTX_PAYLOAD_INIT_SCHEMA_N5(type, memberId, etype, name, desc) \
205
+ 0, NVTX_PAYLOAD_ENTRY_##etype, name, desc, 0,
206
+ #define _NVTX_PAYLOAD_INIT_SCHEMA_N6(type, memberId, etype, name, desc, arraylen) \
207
+ 0, NVTX_PAYLOAD_ENTRY_##etype, name, desc, arraylen,
208
+ #define _NVTX_PAYLOAD_INIT_SCHEMA_N7(type, memberId, etype, name, desc, arraylen, flags) \
209
+ NVTX_PAYLOAD_ENTRY_FLAG_##flags, NVTX_PAYLOAD_ENTRY_##etype, name, desc, arraylen,
210
+
211
+ #define _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY_FRONT(...) \
212
+ NVTX_EXT_CONCAT(_NVTX_PAYLOAD_INIT_SCHEMA_N, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
213
+
214
+ #define _NVTX_PAYLOAD_ARRAY_MEMBER1(name) name
215
+ #define _NVTX_PAYLOAD_ARRAY_MEMBER2(name, count) name
216
+
217
+ /* Resolve to last part of schema entry (append struct member). */
218
+ #define _NVTX_PAYLOAD_INIT_SCHEMA_NX3(type, memberId, ...) memberId
219
+ #define _NVTX_PAYLOAD_INIT_SCHEMA_NX4(type, memberId, ...) memberId
220
+ #define _NVTX_PAYLOAD_INIT_SCHEMA_NX5(type, memberId, ...) memberId
221
+ #define _NVTX_PAYLOAD_INIT_SCHEMA_NX6(type, memberId, ...) \
222
+ NVTX_EXT_CONCAT(_NVTX_PAYLOAD_ARRAY_MEMBER, NVTX_EXT_NUM_ARGS memberId) memberId
223
+ #define _NVTX_PAYLOAD_INIT_SCHEMA_NX7(type, memberId, ...) \
224
+ _NVTX_PAYLOAD_INIT_SCHEMA_NX6(type, memberId, __VA_ARGS__)
225
+
226
+ #define _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY_END(...) \
227
+ NVTX_EXT_CONCAT(_NVTX_PAYLOAD_INIT_SCHEMA_NX, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
228
+
229
+ /* Resolve to schema entry. `entry` is `(ctype, name, ...)`. */
230
+ #define _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(struct_id, entry) \
231
+ {_NVTX_PAYLOAD_SCHEMA_INIT_ENTRY_FRONT entry \
232
+ offsetof(struct_id, _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY_END entry)},
233
+
234
+ /* Handle up to 16 schema entries. */
235
+ #define _NVTX_PAYLOAD_INIT_SME1(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1)
236
+ #define _NVTX_PAYLOAD_INIT_SME2(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME1(s, __VA_ARGS__)
237
+ #define _NVTX_PAYLOAD_INIT_SME3(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME2(s, __VA_ARGS__)
238
+ #define _NVTX_PAYLOAD_INIT_SME4(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME3(s, __VA_ARGS__)
239
+ #define _NVTX_PAYLOAD_INIT_SME5(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME4(s, __VA_ARGS__)
240
+ #define _NVTX_PAYLOAD_INIT_SME6(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME5(s, __VA_ARGS__)
241
+ #define _NVTX_PAYLOAD_INIT_SME7(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME6(s, __VA_ARGS__)
242
+ #define _NVTX_PAYLOAD_INIT_SME8(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME7(s, __VA_ARGS__)
243
+ #define _NVTX_PAYLOAD_INIT_SME9(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME8(s, __VA_ARGS__)
244
+ #define _NVTX_PAYLOAD_INIT_SME10(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME9(s, __VA_ARGS__)
245
+ #define _NVTX_PAYLOAD_INIT_SME11(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME10(s, __VA_ARGS__)
246
+ #define _NVTX_PAYLOAD_INIT_SME12(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME11(s, __VA_ARGS__)
247
+ #define _NVTX_PAYLOAD_INIT_SME13(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME12(s, __VA_ARGS__)
248
+ #define _NVTX_PAYLOAD_INIT_SME14(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME13(s, __VA_ARGS__)
249
+ #define _NVTX_PAYLOAD_INIT_SME15(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME14(s, __VA_ARGS__)
250
+ #define _NVTX_PAYLOAD_INIT_SME16(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME15(s, __VA_ARGS__)
251
+
252
+ #define _NVTX_PAYLOAD_SCHEMA_INIT_ENTRIES(struct_id, ...) \
253
+ nvtxPayloadSchemaEntry_t struct_id##Schema[] = { \
254
+ NVTX_EXT_CONCAT(_NVTX_PAYLOAD_INIT_SME, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(struct_id, __VA_ARGS__) \
255
+ {0, 0} \
256
+ };
257
+
258
+ /*
259
+ * Handle optional parameters for `NVTX_DEFINE_STRUCT_WITH_SCHEMA[_AND_REGISTER]`.
260
+ */
261
+ #define _NVTX_DEFINE_SWS_6(struct_id, schema_name, prefix, schema_flags, schema_id, entries) \
262
+ _NVTX_PAYLOAD_TYPEDEF_STRUCT(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
263
+ prefix _NVTX_PAYLOAD_SCHEMA_INIT_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
264
+ prefix NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, schema_name, schema_flags, schema_id, \
265
+ NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_FLAGS | \
266
+ NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_SCHEMA_ID |, \
267
+ NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
268
+ #define _NVTX_DEFINE_SWS_5(struct_id, schema_name, prefix, schema_flags, entries) \
269
+ _NVTX_PAYLOAD_TYPEDEF_STRUCT(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
270
+ prefix _NVTX_PAYLOAD_SCHEMA_INIT_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
271
+ prefix NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, schema_name, schema_flags, 0, \
272
+ NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_FLAGS |, \
273
+ NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
274
+ #define _NVTX_DEFINE_SWS_4(struct_id, schema_name, prefix, entries) \
275
+ _NVTX_PAYLOAD_TYPEDEF_STRUCT(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
276
+ prefix _NVTX_PAYLOAD_SCHEMA_INIT_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
277
+ prefix NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, schema_name, NVTX_PAYLOAD_SCHEMA_FLAG_NONE, 0, \
278
+ NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME |, \
279
+ NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
280
+ #define _NVTX_DEFINE_SWS_3(struct_id, schema_name, entries) \
281
+ _NVTX_DEFINE_SWS_4(struct_id, schema_name, /* no prefix */, entries)
282
+ #define _NVTX_DEFINE_SWS_2(struct_id, entries) \
283
+ _NVTX_PAYLOAD_TYPEDEF_STRUCT(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
284
+ _NVTX_PAYLOAD_SCHEMA_INIT_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
285
+ NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, NVTX_NULLPTR, NVTX_PAYLOAD_SCHEMA_FLAG_NONE, 0, , \
286
+ NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
287
+
288
+ #define _NVTX_DEFINE_STRUCT_WITH_SCHEMA(struct_id, ...) \
289
+ NVTX_EXT_CONCAT(_NVTX_DEFINE_SWS_, \
290
+ NVTX_EXT_NUM_ARGS(struct_id, __VA_ARGS__))(struct_id, __VA_ARGS__)
291
+
292
+ /*** END: Helper for `NVTX_PAYLOAD_STATIC_SCHEMA_{INIT,CREATE}` */
293
+
294
+ #endif /* NVTX_EXT_PAYLOAD_HELPER_INTERNAL_H */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtPayloadTypeInfo.h ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_EXT_IMPL_PAYLOAD_GUARD
22
+ #error Never include this file directly -- it is automatically included by nvToolsExtPayload.h (except when NVTX_NO_IMPL is defined).
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+ typedef void* nvtx_payload_pointer_type;
36
+
37
+ #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
38
+ #ifndef __APPLE__
39
+ #include <uchar.h>
40
+ #endif
41
+ #include <stdalign.h>
42
+ #endif
43
+
44
+ /* `char8_t` is available as of C++20 or C23 */
45
+ #if ((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || (defined(__cplusplus) && __cplusplus >= 201811L)) && !defined(__APPLE__)
46
+ #define NVTX_HAVE_CHAR8 1
47
+ #else
48
+ #define NVTX_HAVE_CHAR8 0
49
+ #endif
50
+
51
+ /* `char16_t` and `char32_t` are available as of C++11 or C11 */
52
+ #if ((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || (defined(__cplusplus) && __cplusplus >= 200704L)) && !defined(__APPLE__)
53
+ #define NVTX_HAVE_CHAR16_CHAR32 1
54
+ #else
55
+ #define NVTX_HAVE_CHAR16_CHAR32 0
56
+ #endif
57
+
58
+ /* `alignof` is available as of C11 or C++11. */
59
+ #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || (defined(__cplusplus) && __cplusplus >= 201103L)
60
+
61
+ #define nvtx_alignof(type) alignof(type)
62
+ #define nvtx_alignof2(type,tname) alignof(type)
63
+
64
+ #else /* (__STDC_VERSION__ >= 201112L) || (__cplusplus >= 201103L) */
65
+
66
+ /* Create helper structs to determine type alignment. */
67
+ #define MKTYPEDEF(type) typedef struct {char c; type d;} _nvtx_##type
68
+ #define MKTYPEDEF2(type,tname) typedef struct {char c; type d;} _nvtx_##tname
69
+
70
+ MKTYPEDEF(char);
71
+ MKTYPEDEF2(unsigned char, uchar);
72
+ MKTYPEDEF(short);
73
+ MKTYPEDEF2(unsigned short, ushort);
74
+ MKTYPEDEF(int);
75
+ MKTYPEDEF2(unsigned int, uint);
76
+ MKTYPEDEF(long);
77
+ MKTYPEDEF2(unsigned long, ulong);
78
+ MKTYPEDEF2(long long, longlong);
79
+ MKTYPEDEF2(unsigned long long, ulonglong);
80
+
81
+ MKTYPEDEF(int8_t);
82
+ MKTYPEDEF(uint8_t);
83
+ MKTYPEDEF(int16_t);
84
+ MKTYPEDEF(uint16_t);
85
+ MKTYPEDEF(int32_t);
86
+ MKTYPEDEF(uint32_t);
87
+ MKTYPEDEF(int64_t);
88
+ MKTYPEDEF(uint64_t);
89
+
90
+ MKTYPEDEF(float);
91
+ MKTYPEDEF(double);
92
+ MKTYPEDEF2(long double, longdouble);
93
+
94
+ MKTYPEDEF(size_t);
95
+ MKTYPEDEF(nvtx_payload_pointer_type);
96
+
97
+ MKTYPEDEF(wchar_t);
98
+
99
+ #if NVTX_HAVE_CHAR8
100
+ MKTYPEDEF(char8_t);
101
+ #endif
102
+
103
+ #if NVTX_HAVE_CHAR16_CHAR32
104
+ MKTYPEDEF(char16_t);
105
+ MKTYPEDEF(char32_t);
106
+ #endif
107
+
108
+ /* C requires to include stddef.h to use `offsetof` */
109
+ #ifndef __cplusplus
110
+ #include <stddef.h>
111
+ #endif
112
+
113
+ #define nvtx_alignof(tname) offsetof(_nvtx_##tname, d)
114
+ #define nvtx_alignof2(type, tname) offsetof(_nvtx_##tname, d)
115
+
116
+ #endif /* __STDC_VERSION__ >= 201112L */
117
+
118
+ #undef MKTYPEDEF
119
+ #undef MKTYPEDEF2
120
+
121
+ /*
122
+ * Helper array to get the alignment for each predefined C/C++ language type.
123
+ * The order of entries must match the values in`enum nvtxPayloadSchemaEntryType`.
124
+ *
125
+ * In C++, `const` variables use internal linkage by default, but we need it to
126
+ * be public (extern) since weak declarations must be public.
127
+ */
128
+ NVTX_LINKONCE_DEFINE_GLOBAL
129
+ #ifdef __cplusplus
130
+ extern
131
+ #endif
132
+ const nvtxPayloadEntryTypeInfo_t
133
+ NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadTypeInfo)[NVTX_PAYLOAD_ENTRY_TYPE_INFO_ARRAY_SIZE] =
134
+ {
135
+ /* The first entry contains this array's length and the size of each entry in this array. */
136
+ {NVTX_PAYLOAD_ENTRY_TYPE_INFO_ARRAY_SIZE, sizeof(nvtxPayloadEntryTypeInfo_t)},
137
+
138
+ /*** C integer types ***/
139
+ /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR */ {sizeof(char), nvtx_alignof(char)},
140
+ /* NVTX_PAYLOAD_ENTRY_TYPE_UCHAR */ {sizeof(unsigned char), nvtx_alignof2(unsigned char, uchar)},
141
+ /* NVTX_PAYLOAD_ENTRY_TYPE_SHORT */ {sizeof(short), nvtx_alignof(short)},
142
+ /* NVTX_PAYLOAD_ENTRY_TYPE_USHORT */ {sizeof(unsigned short), nvtx_alignof2(unsigned short, ushort)},
143
+ /* NVTX_PAYLOAD_ENTRY_TYPE_INT */ {sizeof(int), nvtx_alignof(int)},
144
+ /* NVTX_PAYLOAD_ENTRY_TYPE_UINT */ {sizeof(unsigned int), nvtx_alignof2(unsigned int, uint)},
145
+ /* NVTX_PAYLOAD_ENTRY_TYPE_LONG */ {sizeof(long), nvtx_alignof(long)},
146
+ /* NVTX_PAYLOAD_ENTRY_TYPE_ULONG */ {sizeof(unsigned long), nvtx_alignof2(unsigned long, ulong)},
147
+ /* NVTX_PAYLOAD_ENTRY_TYPE_LONGLONG */ {sizeof(long long), nvtx_alignof2(long long, longlong)},
148
+ /* NVTX_PAYLOAD_ENTRY_TYPE_ULONGLONG */ {sizeof(unsigned long long), nvtx_alignof2(unsigned long long,ulonglong)},
149
+
150
+ /*** Integer types with explicit size ***/
151
+ /* NVTX_PAYLOAD_ENTRY_TYPE_INT8 */ {sizeof(int8_t), nvtx_alignof(int8_t)},
152
+ /* NVTX_PAYLOAD_ENTRY_TYPE_UINT8 */ {sizeof(uint8_t), nvtx_alignof(uint8_t)},
153
+ /* NVTX_PAYLOAD_ENTRY_TYPE_INT16 */ {sizeof(int16_t), nvtx_alignof(int16_t)},
154
+ /* NVTX_PAYLOAD_ENTRY_TYPE_UINT16 */ {sizeof(uint16_t), nvtx_alignof(uint16_t)},
155
+ /* NVTX_PAYLOAD_ENTRY_TYPE_INT32 */ {sizeof(int32_t), nvtx_alignof(int32_t)},
156
+ /* NVTX_PAYLOAD_ENTRY_TYPE_UINT32 */ {sizeof(uint32_t), nvtx_alignof(uint32_t)},
157
+ /* NVTX_PAYLOAD_ENTRY_TYPE_INT64 */ {sizeof(int64_t), nvtx_alignof(int64_t)},
158
+ /* NVTX_PAYLOAD_ENTRY_TYPE_UINT64 */ {sizeof(uint64_t), nvtx_alignof(uint64_t)},
159
+
160
+ /*** C floating point types ***/
161
+ /* NVTX_PAYLOAD_ENTRY_TYPE_FLOAT */ {sizeof(float), nvtx_alignof(float)},
162
+ /* NVTX_PAYLOAD_ENTRY_TYPE_DOUBLE */ {sizeof(double), nvtx_alignof(double)},
163
+ /* NVTX_PAYLOAD_ENTRY_TYPE_LONGDOUBLE */ {sizeof(long double), nvtx_alignof2(long double, longdouble)},
164
+
165
+ /* NVTX_PAYLOAD_ENTRY_TYPE_SIZE */ {sizeof(size_t), nvtx_alignof(size_t)},
166
+ /* NVTX_PAYLOAD_ENTRY_TYPE_ADDRESS */ {sizeof(nvtx_payload_pointer_type), nvtx_alignof(nvtx_payload_pointer_type)},
167
+
168
+ /*** Special character types ***/
169
+ /* NVTX_PAYLOAD_ENTRY_TYPE_WCHAR */ {sizeof(wchar_t), nvtx_alignof(wchar_t)},
170
+
171
+ #if NVTX_HAVE_CHAR8
172
+ /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR8 */ {sizeof(char8_t), nvtx_alignof(char8_t)},
173
+ #else
174
+ /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR8 */ {0, 0},
175
+ #endif
176
+
177
+ #if NVTX_HAVE_CHAR16_CHAR32
178
+ /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR16 */ {sizeof(char16_t), nvtx_alignof(char16_t)},
179
+ /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR32 */ {sizeof(char32_t), nvtx_alignof(char32_t)}
180
+ #else
181
+ /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR16 */ {0, 0},
182
+ /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR32 */ {0, 0}
183
+ #endif
184
+ };
185
+
186
+ #undef nvtx_alignof
187
+ #undef nvtx_alignof2
188
+ #undef NVTX_HAVE_CHAR8
189
+ #undef NVTX_HAVE_CHAR16_CHAR32
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtTypes.h ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_EXT_TYPES_GUARD
22
+ #error Never include this file directly -- it is automatically included by nvToolsExt[EXTENSION].h.
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+ /* This header defines types which are used by the internal implementation
36
+ * of NVTX and callback subscribers. API clients do not use these types,
37
+ * so they are defined here instead of in nvToolsExt.h to clarify they are
38
+ * not part of the NVTX client API. */
39
+
40
+ #ifndef NVTXEXTTYPES_H
41
+ #define NVTXEXTTYPES_H
42
+
43
+ typedef intptr_t (NVTX_API * NvtxExtGetExportFunction_t)(uint32_t exportFunctionId);
44
+
45
+ typedef struct nvtxExtModuleSegment_t
46
+ {
47
+ size_t segmentId;
48
+ size_t slotCount;
49
+ intptr_t* functionSlots;
50
+ } nvtxExtModuleSegment_t;
51
+
52
+ typedef struct nvtxExtModuleInfo_t
53
+ {
54
+ uint16_t nvtxVer;
55
+ uint16_t structSize;
56
+ uint16_t moduleId;
57
+ uint16_t compatId;
58
+ size_t segmentsCount;
59
+ nvtxExtModuleSegment_t* segments;
60
+ NvtxExtGetExportFunction_t getExportFunction;
61
+ const void* extInfo;
62
+ } nvtxExtModuleInfo_t;
63
+
64
+ typedef int (NVTX_API * NvtxExtInitializeInjectionFunc_t)(nvtxExtModuleInfo_t* moduleInfo);
65
+
66
+ #endif /* NVTXEXTTYPES_H */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImpl.h ADDED
@@ -0,0 +1,464 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_IMPL_GUARD
22
+ #error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+ #include <stdlib.h>
36
+ #include <stdio.h>
37
+ #include <string.h>
38
+ #include <wchar.h>
39
+
40
+ /* ---- Include required platform headers ---- */
41
+
42
+ #if defined(_WIN32)
43
+
44
+ #include <windows.h>
45
+
46
+ #else
47
+ #include <unistd.h>
48
+
49
+ #if defined(__ANDROID__)
50
+ #include <android/api-level.h>
51
+ #endif
52
+
53
+ #if defined(__linux__) || defined(__CYGWIN__)
54
+ #include <sched.h>
55
+ #endif
56
+
57
+ #include <sys/types.h>
58
+ #include <limits.h>
59
+ #include <dlfcn.h>
60
+ #include <fcntl.h>
61
+ #include <errno.h>
62
+ #include <pthread.h>
63
+
64
+ #endif
65
+
66
+ /* ---- Define macros used in this file ---- */
67
+
68
+ #define NVTX_INIT_STATE_FRESH 0
69
+ #define NVTX_INIT_STATE_STARTED 1
70
+ #define NVTX_INIT_STATE_COMPLETE 2
71
+
72
+ #ifdef NVTX_DEBUG_PRINT
73
+ #ifdef __ANDROID__
74
+ #include <android/log.h>
75
+ #define NVTX_ERR(...) __android_log_print(ANDROID_LOG_ERROR, "NVTOOLSEXT", __VA_ARGS__);
76
+ #define NVTX_INFO(...) __android_log_print(ANDROID_LOG_INFO, "NVTOOLSEXT", __VA_ARGS__);
77
+ #else
78
+ #include <stdio.h>
79
+ #define NVTX_ERR(...) fprintf(stderr, "NVTX_ERROR: " __VA_ARGS__)
80
+ #define NVTX_INFO(...) fprintf(stderr, "NVTX_INFO: " __VA_ARGS__)
81
+ #endif
82
+ #else /* !defined(NVTX_DEBUG_PRINT) */
83
+ #define NVTX_ERR(...)
84
+ #define NVTX_INFO(...)
85
+ #endif
86
+
87
+ #ifdef __cplusplus
88
+ extern "C" {
89
+ #endif /* __cplusplus */
90
+
91
+ #ifdef __GNUC__
92
+ #pragma GCC visibility push(hidden)
93
+ #endif
94
+
95
+ /* ---- Forward declare all functions referenced in globals ---- */
96
+
97
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(void);
98
+ NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiGetModuleFunctionTable)(
99
+ NvtxCallbackModule callback_module,
100
+ NvtxFunctionTable* out_table,
101
+ unsigned int* out_size);
102
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiSetInjectionNvtxVersion)(
103
+ uint32_t version);
104
+ NVTX_LINKONCE_FWDDECL_FUNCTION const void* NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable)(
105
+ uint32_t exportTableId);
106
+
107
+ #include "nvtxInitDecls.h"
108
+
109
+ /* ---- Define all globals ---- */
110
+
111
+ typedef struct nvtxGlobals_t
112
+ {
113
+ volatile unsigned int initState;
114
+ NvtxExportTableCallbacks etblCallbacks;
115
+ NvtxExportTableVersionInfo etblVersionInfo;
116
+
117
+ /* Implementation function pointers */
118
+ nvtxMarkEx_impl_fntype nvtxMarkEx_impl_fnptr;
119
+ nvtxMarkA_impl_fntype nvtxMarkA_impl_fnptr;
120
+ nvtxMarkW_impl_fntype nvtxMarkW_impl_fnptr;
121
+ nvtxRangeStartEx_impl_fntype nvtxRangeStartEx_impl_fnptr;
122
+ nvtxRangeStartA_impl_fntype nvtxRangeStartA_impl_fnptr;
123
+ nvtxRangeStartW_impl_fntype nvtxRangeStartW_impl_fnptr;
124
+ nvtxRangeEnd_impl_fntype nvtxRangeEnd_impl_fnptr;
125
+ nvtxRangePushEx_impl_fntype nvtxRangePushEx_impl_fnptr;
126
+ nvtxRangePushA_impl_fntype nvtxRangePushA_impl_fnptr;
127
+ nvtxRangePushW_impl_fntype nvtxRangePushW_impl_fnptr;
128
+ nvtxRangePop_impl_fntype nvtxRangePop_impl_fnptr;
129
+ nvtxNameCategoryA_impl_fntype nvtxNameCategoryA_impl_fnptr;
130
+ nvtxNameCategoryW_impl_fntype nvtxNameCategoryW_impl_fnptr;
131
+ nvtxNameOsThreadA_impl_fntype nvtxNameOsThreadA_impl_fnptr;
132
+ nvtxNameOsThreadW_impl_fntype nvtxNameOsThreadW_impl_fnptr;
133
+
134
+ nvtxNameCuDeviceA_fakeimpl_fntype nvtxNameCuDeviceA_impl_fnptr;
135
+ nvtxNameCuDeviceW_fakeimpl_fntype nvtxNameCuDeviceW_impl_fnptr;
136
+ nvtxNameCuContextA_fakeimpl_fntype nvtxNameCuContextA_impl_fnptr;
137
+ nvtxNameCuContextW_fakeimpl_fntype nvtxNameCuContextW_impl_fnptr;
138
+ nvtxNameCuStreamA_fakeimpl_fntype nvtxNameCuStreamA_impl_fnptr;
139
+ nvtxNameCuStreamW_fakeimpl_fntype nvtxNameCuStreamW_impl_fnptr;
140
+ nvtxNameCuEventA_fakeimpl_fntype nvtxNameCuEventA_impl_fnptr;
141
+ nvtxNameCuEventW_fakeimpl_fntype nvtxNameCuEventW_impl_fnptr;
142
+
143
+ nvtxNameClDeviceA_fakeimpl_fntype nvtxNameClDeviceA_impl_fnptr;
144
+ nvtxNameClDeviceW_fakeimpl_fntype nvtxNameClDeviceW_impl_fnptr;
145
+ nvtxNameClContextA_fakeimpl_fntype nvtxNameClContextA_impl_fnptr;
146
+ nvtxNameClContextW_fakeimpl_fntype nvtxNameClContextW_impl_fnptr;
147
+ nvtxNameClCommandQueueA_fakeimpl_fntype nvtxNameClCommandQueueA_impl_fnptr;
148
+ nvtxNameClCommandQueueW_fakeimpl_fntype nvtxNameClCommandQueueW_impl_fnptr;
149
+ nvtxNameClMemObjectA_fakeimpl_fntype nvtxNameClMemObjectA_impl_fnptr;
150
+ nvtxNameClMemObjectW_fakeimpl_fntype nvtxNameClMemObjectW_impl_fnptr;
151
+ nvtxNameClSamplerA_fakeimpl_fntype nvtxNameClSamplerA_impl_fnptr;
152
+ nvtxNameClSamplerW_fakeimpl_fntype nvtxNameClSamplerW_impl_fnptr;
153
+ nvtxNameClProgramA_fakeimpl_fntype nvtxNameClProgramA_impl_fnptr;
154
+ nvtxNameClProgramW_fakeimpl_fntype nvtxNameClProgramW_impl_fnptr;
155
+ nvtxNameClEventA_fakeimpl_fntype nvtxNameClEventA_impl_fnptr;
156
+ nvtxNameClEventW_fakeimpl_fntype nvtxNameClEventW_impl_fnptr;
157
+
158
+ nvtxNameCudaDeviceA_fakeimpl_fntype nvtxNameCudaDeviceA_impl_fnptr;
159
+ nvtxNameCudaDeviceW_fakeimpl_fntype nvtxNameCudaDeviceW_impl_fnptr;
160
+ nvtxNameCudaStreamA_fakeimpl_fntype nvtxNameCudaStreamA_impl_fnptr;
161
+ nvtxNameCudaStreamW_fakeimpl_fntype nvtxNameCudaStreamW_impl_fnptr;
162
+ nvtxNameCudaEventA_fakeimpl_fntype nvtxNameCudaEventA_impl_fnptr;
163
+ nvtxNameCudaEventW_fakeimpl_fntype nvtxNameCudaEventW_impl_fnptr;
164
+
165
+ nvtxDomainMarkEx_impl_fntype nvtxDomainMarkEx_impl_fnptr;
166
+ nvtxDomainRangeStartEx_impl_fntype nvtxDomainRangeStartEx_impl_fnptr;
167
+ nvtxDomainRangeEnd_impl_fntype nvtxDomainRangeEnd_impl_fnptr;
168
+ nvtxDomainRangePushEx_impl_fntype nvtxDomainRangePushEx_impl_fnptr;
169
+ nvtxDomainRangePop_impl_fntype nvtxDomainRangePop_impl_fnptr;
170
+ nvtxDomainResourceCreate_impl_fntype nvtxDomainResourceCreate_impl_fnptr;
171
+ nvtxDomainResourceDestroy_impl_fntype nvtxDomainResourceDestroy_impl_fnptr;
172
+ nvtxDomainNameCategoryA_impl_fntype nvtxDomainNameCategoryA_impl_fnptr;
173
+ nvtxDomainNameCategoryW_impl_fntype nvtxDomainNameCategoryW_impl_fnptr;
174
+ nvtxDomainRegisterStringA_impl_fntype nvtxDomainRegisterStringA_impl_fnptr;
175
+ nvtxDomainRegisterStringW_impl_fntype nvtxDomainRegisterStringW_impl_fnptr;
176
+ nvtxDomainCreateA_impl_fntype nvtxDomainCreateA_impl_fnptr;
177
+ nvtxDomainCreateW_impl_fntype nvtxDomainCreateW_impl_fnptr;
178
+ nvtxDomainDestroy_impl_fntype nvtxDomainDestroy_impl_fnptr;
179
+ nvtxInitialize_impl_fntype nvtxInitialize_impl_fnptr;
180
+
181
+ nvtxDomainSyncUserCreate_fakeimpl_fntype nvtxDomainSyncUserCreate_impl_fnptr;
182
+ nvtxDomainSyncUserDestroy_fakeimpl_fntype nvtxDomainSyncUserDestroy_impl_fnptr;
183
+ nvtxDomainSyncUserAcquireStart_fakeimpl_fntype nvtxDomainSyncUserAcquireStart_impl_fnptr;
184
+ nvtxDomainSyncUserAcquireFailed_fakeimpl_fntype nvtxDomainSyncUserAcquireFailed_impl_fnptr;
185
+ nvtxDomainSyncUserAcquireSuccess_fakeimpl_fntype nvtxDomainSyncUserAcquireSuccess_impl_fnptr;
186
+ nvtxDomainSyncUserReleasing_fakeimpl_fntype nvtxDomainSyncUserReleasing_impl_fnptr;
187
+
188
+ /* Tables of function pointers -- Extra null added to the end to ensure
189
+ * a crash instead of silent corruption if a tool reads off the end. */
190
+ NvtxFunctionPointer* functionTable_CORE [NVTX_CBID_CORE_SIZE + 1];
191
+ NvtxFunctionPointer* functionTable_CUDA [NVTX_CBID_CUDA_SIZE + 1];
192
+ NvtxFunctionPointer* functionTable_OPENCL[NVTX_CBID_OPENCL_SIZE + 1];
193
+ NvtxFunctionPointer* functionTable_CUDART[NVTX_CBID_CUDART_SIZE + 1];
194
+ NvtxFunctionPointer* functionTable_CORE2 [NVTX_CBID_CORE2_SIZE + 1];
195
+ NvtxFunctionPointer* functionTable_SYNC [NVTX_CBID_SYNC_SIZE + 1];
196
+ } nvtxGlobals_t;
197
+
198
+ #define NVTX_GLOBAL_TABLE_ENTRY(name) ( NVTX_REINTERPRET_CAST(NvtxFunctionPointer*, &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).name ## _impl_fnptr ) )
199
+
200
+ NVTX_LINKONCE_DEFINE_GLOBAL nvtxGlobals_t NVTX_VERSIONED_IDENTIFIER(nvtxGlobals) =
201
+ {
202
+ NVTX_INIT_STATE_FRESH,
203
+
204
+ {
205
+ sizeof(NvtxExportTableCallbacks),
206
+ NVTX_VERSIONED_IDENTIFIER(nvtxEtiGetModuleFunctionTable)
207
+ },
208
+ {
209
+ sizeof(NvtxExportTableVersionInfo),
210
+ NVTX_VERSION,
211
+ 0,
212
+ NVTX_VERSIONED_IDENTIFIER(nvtxEtiSetInjectionNvtxVersion)
213
+ },
214
+
215
+ /* Implementation function pointers */
216
+ NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init),
217
+ NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init),
218
+ NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init),
219
+ NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init),
220
+ NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init),
221
+ NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init),
222
+ NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init),
223
+ NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init),
224
+ NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init),
225
+ NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init),
226
+ NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init),
227
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init),
228
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init),
229
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init),
230
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init),
231
+
232
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init),
233
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init),
234
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init),
235
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init),
236
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init),
237
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init),
238
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init),
239
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init),
240
+
241
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init),
242
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init),
243
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init),
244
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init),
245
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init),
246
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init),
247
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init),
248
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init),
249
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init),
250
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init),
251
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init),
252
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init),
253
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init),
254
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init),
255
+
256
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init),
257
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init),
258
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init),
259
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init),
260
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init),
261
+ NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init),
262
+
263
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init),
264
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init),
265
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init),
266
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init),
267
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init),
268
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init),
269
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init),
270
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init),
271
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init),
272
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init),
273
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init),
274
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init),
275
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init),
276
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init),
277
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init),
278
+
279
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init),
280
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init),
281
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init),
282
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init),
283
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init),
284
+ NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init),
285
+
286
+ /* Tables of function pointers */
287
+ {
288
+ NVTX_NULLPTR,
289
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxMarkEx),
290
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxMarkA),
291
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxMarkW),
292
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxRangeStartEx),
293
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxRangeStartA),
294
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxRangeStartW),
295
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxRangeEnd),
296
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxRangePushEx),
297
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxRangePushA),
298
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxRangePushW),
299
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxRangePop),
300
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCategoryA),
301
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCategoryW),
302
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameOsThreadA),
303
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameOsThreadW),
304
+ NVTX_NULLPTR
305
+ },
306
+ {
307
+ NVTX_NULLPTR,
308
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuDeviceA),
309
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuDeviceW),
310
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuContextA),
311
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuContextW),
312
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuStreamA),
313
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuStreamW),
314
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuEventA),
315
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuEventW),
316
+ NVTX_NULLPTR
317
+ },
318
+ {
319
+ NVTX_NULLPTR,
320
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClDeviceA),
321
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClDeviceW),
322
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClContextA),
323
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClContextW),
324
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClCommandQueueA),
325
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClCommandQueueW),
326
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClMemObjectA),
327
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClMemObjectW),
328
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClSamplerA),
329
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClSamplerW),
330
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClProgramA),
331
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClProgramW),
332
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClEventA),
333
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClEventW),
334
+ NVTX_NULLPTR
335
+ },
336
+ {
337
+ NVTX_NULLPTR,
338
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCudaDeviceA),
339
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCudaDeviceW),
340
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCudaStreamA),
341
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCudaStreamW),
342
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCudaEventA),
343
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCudaEventW),
344
+ NVTX_NULLPTR
345
+ },
346
+ {
347
+ NVTX_NULLPTR,
348
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainMarkEx),
349
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainRangeStartEx),
350
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainRangeEnd),
351
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainRangePushEx),
352
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainRangePop),
353
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainResourceCreate),
354
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainResourceDestroy),
355
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainNameCategoryA),
356
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainNameCategoryW),
357
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainRegisterStringA),
358
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainRegisterStringW),
359
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainCreateA),
360
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainCreateW),
361
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainDestroy),
362
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxInitialize),
363
+ NVTX_NULLPTR
364
+ },
365
+ {
366
+ NVTX_NULLPTR,
367
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainSyncUserCreate),
368
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainSyncUserDestroy),
369
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainSyncUserAcquireStart),
370
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainSyncUserAcquireFailed),
371
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainSyncUserAcquireSuccess),
372
+ NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainSyncUserReleasing),
373
+ NVTX_NULLPTR
374
+ }
375
+ };
376
+
377
+ #undef NVTX_GLOBAL_TABLE_ENTRY
378
+
379
+ /* ---- Define static inline implementations of core API functions ---- */
380
+
381
+ #include "nvtxImplCore.h"
382
+
383
+ /* ---- Define implementations of export table functions ---- */
384
+
385
+ NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiGetModuleFunctionTable)(
386
+ NvtxCallbackModule callback_module,
387
+ NvtxFunctionTable* out_table,
388
+ unsigned int* out_size)
389
+ {
390
+ unsigned int bytes = 0;
391
+ NvtxFunctionTable table = NVTX_NULLPTR;
392
+
393
+ switch (callback_module)
394
+ {
395
+ case NVTX_CB_MODULE_CORE:
396
+ table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE;
397
+ bytes = NVTX_STATIC_CAST(unsigned int, sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE));
398
+ break;
399
+ case NVTX_CB_MODULE_CUDA:
400
+ table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDA;
401
+ bytes = NVTX_STATIC_CAST(unsigned int, sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDA));
402
+ break;
403
+ case NVTX_CB_MODULE_OPENCL:
404
+ table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_OPENCL;
405
+ bytes = NVTX_STATIC_CAST(unsigned int, sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_OPENCL));
406
+ break;
407
+ case NVTX_CB_MODULE_CUDART:
408
+ table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDART;
409
+ bytes = NVTX_STATIC_CAST(unsigned int, sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDART));
410
+ break;
411
+ case NVTX_CB_MODULE_CORE2:
412
+ table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE2;
413
+ bytes = NVTX_STATIC_CAST(unsigned int, sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE2));
414
+ break;
415
+ case NVTX_CB_MODULE_SYNC:
416
+ table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_SYNC;
417
+ bytes = NVTX_STATIC_CAST(unsigned int, sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_SYNC));
418
+ break;
419
+ case NVTX_CB_MODULE_INVALID:
420
+ case NVTX_CB_MODULE_SIZE:
421
+ case NVTX_CB_MODULE_FORCE_INT:
422
+ default: return 0;
423
+ }
424
+
425
+ if (out_size)
426
+ *out_size = (bytes / NVTX_STATIC_CAST(unsigned int, sizeof(NvtxFunctionPointer*))) - 1;
427
+
428
+ if (out_table)
429
+ *out_table = table;
430
+
431
+ return 1;
432
+ }
433
+
434
+ NVTX_LINKONCE_DEFINE_FUNCTION const void* NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable)(uint32_t exportTableId)
435
+ {
436
+ switch (exportTableId)
437
+ {
438
+ case NVTX_ETID_CALLBACKS: return &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).etblCallbacks;
439
+ case NVTX_ETID_VERSIONINFO: return &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).etblVersionInfo;
440
+ default: return NVTX_NULLPTR;
441
+ }
442
+ }
443
+
444
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiSetInjectionNvtxVersion)(uint32_t version)
445
+ {
446
+ /* Reserved for custom implementations to resolve problems with tools */
447
+ (void)version;
448
+ }
449
+
450
+ /* ---- Define implementations of init versions of all API functions ---- */
451
+
452
+ #include "nvtxInitDefs.h"
453
+
454
+ /* ---- Define implementations of initialization functions ---- */
455
+
456
+ #include "nvtxInit.h"
457
+
458
+ #ifdef __GNUC__
459
+ #pragma GCC visibility pop
460
+ #endif
461
+
462
+ #ifdef __cplusplus
463
+ } /* extern "C" */
464
+ #endif /* __cplusplus */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplCore.h ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ NVTX_DECLSPEC void NVTX_API nvtxMarkEx(const nvtxEventAttributes_t* eventAttrib)
32
+ {
33
+ NVTX_SET_NAME_MANGLING_OPTIONS
34
+ #ifdef NVTX_DISABLE
35
+ (void)eventAttrib;
36
+ #else /* NVTX_DISABLE */
37
+ nvtxMarkEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr;
38
+ if (local != NVTX_NULLPTR)
39
+ (*local)(eventAttrib);
40
+ #endif /* NVTX_DISABLE */
41
+ }
42
+
43
+ NVTX_DECLSPEC void NVTX_API nvtxMarkA(const char* message)
44
+ {
45
+ NVTX_SET_NAME_MANGLING_OPTIONS
46
+ #ifdef NVTX_DISABLE
47
+ (void)message;
48
+ #else /* NVTX_DISABLE */
49
+ nvtxMarkA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr;
50
+ if (local != NVTX_NULLPTR)
51
+ (*local)(message);
52
+ #endif /* NVTX_DISABLE */
53
+ }
54
+
55
+ NVTX_DECLSPEC void NVTX_API nvtxMarkW(const wchar_t* message)
56
+ {
57
+ NVTX_SET_NAME_MANGLING_OPTIONS
58
+ #ifdef NVTX_DISABLE
59
+ (void)message;
60
+ #else /* NVTX_DISABLE */
61
+ nvtxMarkW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr;
62
+ if (local != NVTX_NULLPTR)
63
+ (*local)(message);
64
+ #endif /* NVTX_DISABLE */
65
+ }
66
+
67
+ NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartEx(const nvtxEventAttributes_t* eventAttrib)
68
+ {
69
+ NVTX_SET_NAME_MANGLING_OPTIONS
70
+ #ifdef NVTX_DISABLE
71
+ (void)eventAttrib;
72
+ #else /* NVTX_DISABLE */
73
+ nvtxRangeStartEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr;
74
+ if (local != NVTX_NULLPTR)
75
+ return (*local)(eventAttrib);
76
+ else
77
+ #endif /* NVTX_DISABLE */
78
+ return NVTX_STATIC_CAST(nvtxRangeId_t, 0);
79
+ }
80
+
81
+ NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartA(const char* message)
82
+ {
83
+ NVTX_SET_NAME_MANGLING_OPTIONS
84
+ #ifdef NVTX_DISABLE
85
+ (void)message;
86
+ #else /* NVTX_DISABLE */
87
+ nvtxRangeStartA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr;
88
+ if (local != NVTX_NULLPTR)
89
+ return (*local)(message);
90
+ else
91
+ #endif /* NVTX_DISABLE */
92
+ return NVTX_STATIC_CAST(nvtxRangeId_t, 0);
93
+ }
94
+
95
+ NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartW(const wchar_t* message)
96
+ {
97
+ NVTX_SET_NAME_MANGLING_OPTIONS
98
+ #ifdef NVTX_DISABLE
99
+ (void)message;
100
+ #else /* NVTX_DISABLE */
101
+ nvtxRangeStartW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr;
102
+ if (local != NVTX_NULLPTR)
103
+ return (*local)(message);
104
+ else
105
+ #endif /* NVTX_DISABLE */
106
+ return NVTX_STATIC_CAST(nvtxRangeId_t, 0);
107
+ }
108
+
109
+ NVTX_DECLSPEC void NVTX_API nvtxRangeEnd(nvtxRangeId_t id)
110
+ {
111
+ NVTX_SET_NAME_MANGLING_OPTIONS
112
+ #ifdef NVTX_DISABLE
113
+ (void)id;
114
+ #else /* NVTX_DISABLE */
115
+ nvtxRangeEnd_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr;
116
+ if (local != NVTX_NULLPTR)
117
+ (*local)(id);
118
+ #endif /* NVTX_DISABLE */
119
+ }
120
+
121
+ NVTX_DECLSPEC int NVTX_API nvtxRangePushEx(const nvtxEventAttributes_t* eventAttrib)
122
+ {
123
+ NVTX_SET_NAME_MANGLING_OPTIONS
124
+ #ifdef NVTX_DISABLE
125
+ (void)eventAttrib;
126
+ #else /* NVTX_DISABLE */
127
+ nvtxRangePushEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr;
128
+ if (local != NVTX_NULLPTR)
129
+ return (*local)(eventAttrib);
130
+ else
131
+ #endif /* NVTX_DISABLE */
132
+ return NVTX_STATIC_CAST(int, NVTX_NO_PUSH_POP_TRACKING);
133
+ }
134
+
135
+ NVTX_DECLSPEC int NVTX_API nvtxRangePushA(const char* message)
136
+ {
137
+ NVTX_SET_NAME_MANGLING_OPTIONS
138
+ #ifdef NVTX_DISABLE
139
+ (void)message;
140
+ #else /* NVTX_DISABLE */
141
+ nvtxRangePushA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr;
142
+ if (local != NVTX_NULLPTR)
143
+ return (*local)(message);
144
+ else
145
+ #endif /* NVTX_DISABLE */
146
+ return NVTX_STATIC_CAST(int, NVTX_NO_PUSH_POP_TRACKING);
147
+ }
148
+
149
+ NVTX_DECLSPEC int NVTX_API nvtxRangePushW(const wchar_t* message)
150
+ {
151
+ NVTX_SET_NAME_MANGLING_OPTIONS
152
+ #ifdef NVTX_DISABLE
153
+ (void)message;
154
+ #else /* NVTX_DISABLE */
155
+ nvtxRangePushW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr;
156
+ if (local != NVTX_NULLPTR)
157
+ return (*local)(message);
158
+ else
159
+ #endif /* NVTX_DISABLE */
160
+ return NVTX_STATIC_CAST(int, NVTX_NO_PUSH_POP_TRACKING);
161
+ }
162
+
163
+ NVTX_DECLSPEC int NVTX_API nvtxRangePop(void)
164
+ {
165
+ NVTX_SET_NAME_MANGLING_OPTIONS
166
+ #ifndef NVTX_DISABLE
167
+ nvtxRangePop_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr;
168
+ if (local != NVTX_NULLPTR)
169
+ return (*local)();
170
+ else
171
+ #endif /* NVTX_DISABLE */
172
+ return NVTX_STATIC_CAST(int, NVTX_NO_PUSH_POP_TRACKING);
173
+ }
174
+
175
+ NVTX_DECLSPEC void NVTX_API nvtxNameCategoryA(uint32_t category, const char* name)
176
+ {
177
+ NVTX_SET_NAME_MANGLING_OPTIONS
178
+ #ifdef NVTX_DISABLE
179
+ (void)category;
180
+ (void)name;
181
+ #else /* NVTX_DISABLE */
182
+ nvtxNameCategoryA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr;
183
+ if (local != NVTX_NULLPTR)
184
+ (*local)(category, name);
185
+ #endif /* NVTX_DISABLE */
186
+ }
187
+
188
+ NVTX_DECLSPEC void NVTX_API nvtxNameCategoryW(uint32_t category, const wchar_t* name)
189
+ {
190
+ NVTX_SET_NAME_MANGLING_OPTIONS
191
+ #ifdef NVTX_DISABLE
192
+ (void)category;
193
+ (void)name;
194
+ #else /* NVTX_DISABLE */
195
+ nvtxNameCategoryW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr;
196
+ if (local != NVTX_NULLPTR)
197
+ (*local)(category, name);
198
+ #endif /* NVTX_DISABLE */
199
+ }
200
+
201
+ NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadA(uint32_t threadId, const char* name)
202
+ {
203
+ NVTX_SET_NAME_MANGLING_OPTIONS
204
+ #ifdef NVTX_DISABLE
205
+ (void)threadId;
206
+ (void)name;
207
+ #else /* NVTX_DISABLE */
208
+ nvtxNameOsThreadA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr;
209
+ if (local != NVTX_NULLPTR)
210
+ (*local)(threadId, name);
211
+ #endif /* NVTX_DISABLE */
212
+ }
213
+
214
+ NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadW(uint32_t threadId, const wchar_t* name)
215
+ {
216
+ NVTX_SET_NAME_MANGLING_OPTIONS
217
+ #ifdef NVTX_DISABLE
218
+ (void)threadId;
219
+ (void)name;
220
+ #else /* NVTX_DISABLE */
221
+ nvtxNameOsThreadW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr;
222
+ if (local != NVTX_NULLPTR)
223
+ (*local)(threadId, name);
224
+ #endif /* NVTX_DISABLE */
225
+ }
226
+
227
+ NVTX_DECLSPEC void NVTX_API nvtxDomainMarkEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
228
+ {
229
+ NVTX_SET_NAME_MANGLING_OPTIONS
230
+ #ifdef NVTX_DISABLE
231
+ (void)domain;
232
+ (void)eventAttrib;
233
+ #else /* NVTX_DISABLE */
234
+ nvtxDomainMarkEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr;
235
+ if (local != NVTX_NULLPTR)
236
+ (*local)(domain, eventAttrib);
237
+ #endif /* NVTX_DISABLE */
238
+ }
239
+
240
+ NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxDomainRangeStartEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
241
+ {
242
+ NVTX_SET_NAME_MANGLING_OPTIONS
243
+ #ifdef NVTX_DISABLE
244
+ (void)domain;
245
+ (void)eventAttrib;
246
+ #else /* NVTX_DISABLE */
247
+ nvtxDomainRangeStartEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr;
248
+ if (local != NVTX_NULLPTR)
249
+ return (*local)(domain, eventAttrib);
250
+ else
251
+ #endif /* NVTX_DISABLE */
252
+ return NVTX_STATIC_CAST(nvtxRangeId_t, 0);
253
+ }
254
+
255
+ NVTX_DECLSPEC void NVTX_API nvtxDomainRangeEnd(nvtxDomainHandle_t domain, nvtxRangeId_t id)
256
+ {
257
+ NVTX_SET_NAME_MANGLING_OPTIONS
258
+ #ifdef NVTX_DISABLE
259
+ (void)domain;
260
+ (void)id;
261
+ #else /* NVTX_DISABLE */
262
+ nvtxDomainRangeEnd_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr;
263
+ if (local != NVTX_NULLPTR)
264
+ (*local)(domain, id);
265
+ #endif /* NVTX_DISABLE */
266
+ }
267
+
268
+ NVTX_DECLSPEC int NVTX_API nvtxDomainRangePushEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
269
+ {
270
+ NVTX_SET_NAME_MANGLING_OPTIONS
271
+ #ifdef NVTX_DISABLE
272
+ (void)domain;
273
+ (void)eventAttrib;
274
+ #else /* NVTX_DISABLE */
275
+ nvtxDomainRangePushEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr;
276
+ if (local != NVTX_NULLPTR)
277
+ return (*local)(domain, eventAttrib);
278
+ else
279
+ #endif /* NVTX_DISABLE */
280
+ return NVTX_STATIC_CAST(int, NVTX_NO_PUSH_POP_TRACKING);
281
+ }
282
+
283
+ NVTX_DECLSPEC int NVTX_API nvtxDomainRangePop(nvtxDomainHandle_t domain)
284
+ {
285
+ NVTX_SET_NAME_MANGLING_OPTIONS
286
+ #ifdef NVTX_DISABLE
287
+ (void)domain;
288
+ #else /* NVTX_DISABLE */
289
+ nvtxDomainRangePop_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr;
290
+ if (local != NVTX_NULLPTR)
291
+ return (*local)(domain);
292
+ else
293
+ #endif /* NVTX_DISABLE */
294
+ return NVTX_STATIC_CAST(int, NVTX_NO_PUSH_POP_TRACKING);
295
+ }
296
+
297
+ NVTX_DECLSPEC nvtxResourceHandle_t NVTX_API nvtxDomainResourceCreate(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs)
298
+ {
299
+ NVTX_SET_NAME_MANGLING_OPTIONS
300
+ #ifdef NVTX_DISABLE
301
+ (void)domain;
302
+ (void)attribs;
303
+ #else /* NVTX_DISABLE */
304
+ nvtxDomainResourceCreate_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr;
305
+ if (local != NVTX_NULLPTR)
306
+ return (*local)(domain, attribs);
307
+ else
308
+ #endif /* NVTX_DISABLE */
309
+ return NVTX_NULLPTR;
310
+ }
311
+
312
+ NVTX_DECLSPEC void NVTX_API nvtxDomainResourceDestroy(nvtxResourceHandle_t resource)
313
+ {
314
+ NVTX_SET_NAME_MANGLING_OPTIONS
315
+ #ifdef NVTX_DISABLE
316
+ (void)resource;
317
+ #else /* NVTX_DISABLE */
318
+ nvtxDomainResourceDestroy_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr;
319
+ if (local != NVTX_NULLPTR)
320
+ (*local)(resource);
321
+ #endif /* NVTX_DISABLE */
322
+ }
323
+
324
+ NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryA(nvtxDomainHandle_t domain, uint32_t category, const char* name)
325
+ {
326
+ NVTX_SET_NAME_MANGLING_OPTIONS
327
+ #ifdef NVTX_DISABLE
328
+ (void)domain;
329
+ (void)category;
330
+ (void)name;
331
+ #else /* NVTX_DISABLE */
332
+ nvtxDomainNameCategoryA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr;
333
+ if (local != NVTX_NULLPTR)
334
+ (*local)(domain, category, name);
335
+ #endif /* NVTX_DISABLE */
336
+ }
337
+
338
+ NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryW(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name)
339
+ {
340
+ NVTX_SET_NAME_MANGLING_OPTIONS
341
+ #ifdef NVTX_DISABLE
342
+ (void)domain;
343
+ (void)category;
344
+ (void)name;
345
+ #else /* NVTX_DISABLE */
346
+ nvtxDomainNameCategoryW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr;
347
+ if (local != NVTX_NULLPTR)
348
+ (*local)(domain, category, name);
349
+ #endif /* NVTX_DISABLE */
350
+ }
351
+
352
+ NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringA(nvtxDomainHandle_t domain, const char* string)
353
+ {
354
+ NVTX_SET_NAME_MANGLING_OPTIONS
355
+ #ifdef NVTX_DISABLE
356
+ (void)domain;
357
+ (void)string;
358
+ #else /* NVTX_DISABLE */
359
+ nvtxDomainRegisterStringA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr;
360
+ if (local != NVTX_NULLPTR)
361
+ return (*local)(domain, string);
362
+ else
363
+ #endif /* NVTX_DISABLE */
364
+ return NVTX_NULLPTR;
365
+ }
366
+
367
+ NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringW(nvtxDomainHandle_t domain, const wchar_t* string)
368
+ {
369
+ NVTX_SET_NAME_MANGLING_OPTIONS
370
+ #ifdef NVTX_DISABLE
371
+ (void)domain;
372
+ (void)string;
373
+ #else /* NVTX_DISABLE */
374
+ nvtxDomainRegisterStringW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr;
375
+ if (local != NVTX_NULLPTR)
376
+ return (*local)(domain, string);
377
+ else
378
+ #endif /* NVTX_DISABLE */
379
+ return NVTX_NULLPTR;
380
+ }
381
+
382
+ NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateA(const char* message)
383
+ {
384
+ NVTX_SET_NAME_MANGLING_OPTIONS
385
+ #ifdef NVTX_DISABLE
386
+ (void)message;
387
+ #else /* NVTX_DISABLE */
388
+ nvtxDomainCreateA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr;
389
+ if (local != NVTX_NULLPTR)
390
+ return (*local)(message);
391
+ else
392
+ #endif /* NVTX_DISABLE */
393
+ return NVTX_NULLPTR;
394
+ }
395
+
396
+ NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateW(const wchar_t* message)
397
+ {
398
+ NVTX_SET_NAME_MANGLING_OPTIONS
399
+ #ifdef NVTX_DISABLE
400
+ (void)message;
401
+ #else /* NVTX_DISABLE */
402
+ nvtxDomainCreateW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr;
403
+ if (local != NVTX_NULLPTR)
404
+ return (*local)(message);
405
+ else
406
+ #endif /* NVTX_DISABLE */
407
+ return NVTX_NULLPTR;
408
+ }
409
+
410
+ NVTX_DECLSPEC void NVTX_API nvtxDomainDestroy(nvtxDomainHandle_t domain)
411
+ {
412
+ NVTX_SET_NAME_MANGLING_OPTIONS
413
+ #ifdef NVTX_DISABLE
414
+ (void)domain;
415
+ #else /* NVTX_DISABLE */
416
+ nvtxDomainDestroy_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr;
417
+ if (local != NVTX_NULLPTR)
418
+ (*local)(domain);
419
+ #endif /* NVTX_DISABLE */
420
+ }
421
+
422
+ NVTX_DECLSPEC void NVTX_API nvtxInitialize(const void* reserved)
423
+ {
424
+ NVTX_SET_NAME_MANGLING_OPTIONS
425
+ #ifdef NVTX_DISABLE
426
+ (void)reserved;
427
+ #else /* NVTX_DISABLE */
428
+ nvtxInitialize_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr;
429
+ if (local != NVTX_NULLPTR)
430
+ (*local)(reserved);
431
+ #endif /* NVTX_DISABLE */
432
+ }
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplCudaRt_v3.h ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_IMPL_GUARD_CUDART
22
+ #error Never include this file directly -- it is automatically included by nvToolsExtCudaRt.h (except when NVTX_NO_IMPL is defined).
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+
36
+ #ifdef __cplusplus
37
+ extern "C" {
38
+ #endif /* __cplusplus */
39
+
40
+ typedef void (NVTX_API * nvtxNameCudaDeviceA_impl_fntype)(int device, const char* name);
41
+ typedef void (NVTX_API * nvtxNameCudaDeviceW_impl_fntype)(int device, const wchar_t* name);
42
+ typedef void (NVTX_API * nvtxNameCudaStreamA_impl_fntype)(cudaStream_t stream, const char* name);
43
+ typedef void (NVTX_API * nvtxNameCudaStreamW_impl_fntype)(cudaStream_t stream, const wchar_t* name);
44
+ typedef void (NVTX_API * nvtxNameCudaEventA_impl_fntype)(cudaEvent_t event, const char* name);
45
+ typedef void (NVTX_API * nvtxNameCudaEventW_impl_fntype)(cudaEvent_t event, const wchar_t* name);
46
+
47
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceA(int device, const char* name)
48
+ {
49
+ NVTX_SET_NAME_MANGLING_OPTIONS
50
+ #ifdef NVTX_DISABLE
51
+ (void)device;
52
+ (void)name;
53
+ #else /* NVTX_DISABLE */
54
+ nvtxNameCudaDeviceA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr;
55
+ if (local != NVTX_NULLPTR)
56
+ (*local)(device, name);
57
+ #endif /* NVTX_DISABLE */
58
+ }
59
+
60
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceW(int device, const wchar_t* name)
61
+ {
62
+ NVTX_SET_NAME_MANGLING_OPTIONS
63
+ #ifdef NVTX_DISABLE
64
+ (void)device;
65
+ (void)name;
66
+ #else /* NVTX_DISABLE */
67
+ nvtxNameCudaDeviceW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr;
68
+ if (local != NVTX_NULLPTR)
69
+ (*local)(device, name);
70
+ #endif /* NVTX_DISABLE */
71
+ }
72
+
73
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamA(cudaStream_t stream, const char* name)
74
+ {
75
+ NVTX_SET_NAME_MANGLING_OPTIONS
76
+ #ifdef NVTX_DISABLE
77
+ (void)stream;
78
+ (void)name;
79
+ #else /* NVTX_DISABLE */
80
+ nvtxNameCudaStreamA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCudaStreamA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr);
81
+ if (local != NVTX_NULLPTR)
82
+ (*local)(stream, name);
83
+ #endif /* NVTX_DISABLE */
84
+ }
85
+
86
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamW(cudaStream_t stream, const wchar_t* name)
87
+ {
88
+ NVTX_SET_NAME_MANGLING_OPTIONS
89
+ #ifdef NVTX_DISABLE
90
+ (void)stream;
91
+ (void)name;
92
+ #else /* NVTX_DISABLE */
93
+ nvtxNameCudaStreamW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCudaStreamW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr);
94
+ if (local != NVTX_NULLPTR)
95
+ (*local)(stream, name);
96
+ #endif /* NVTX_DISABLE */
97
+ }
98
+
99
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventA(cudaEvent_t event, const char* name)
100
+ {
101
+ NVTX_SET_NAME_MANGLING_OPTIONS
102
+ #ifdef NVTX_DISABLE
103
+ (void)event;
104
+ (void)name;
105
+ #else /* NVTX_DISABLE */
106
+ nvtxNameCudaEventA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCudaEventA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr);
107
+ if (local != NVTX_NULLPTR)
108
+ (*local)(event, name);
109
+ #endif /* NVTX_DISABLE */
110
+ }
111
+
112
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventW(cudaEvent_t event, const wchar_t* name)
113
+ {
114
+ NVTX_SET_NAME_MANGLING_OPTIONS
115
+ #ifdef NVTX_DISABLE
116
+ (void)event;
117
+ (void)name;
118
+ #else /* NVTX_DISABLE */
119
+ nvtxNameCudaEventW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCudaEventW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr);
120
+ if (local != NVTX_NULLPTR)
121
+ (*local)(event, name);
122
+ #endif /* NVTX_DISABLE */
123
+ }
124
+
125
+ #ifdef __cplusplus
126
+ } /* extern "C" */
127
+ #endif /* __cplusplus */
128
+
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplCuda_v3.h ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_IMPL_GUARD_CUDA
22
+ #error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+
36
+ #ifdef __cplusplus
37
+ extern "C" {
38
+ #endif /* __cplusplus */
39
+
40
+ typedef void (NVTX_API * nvtxNameCuDeviceA_impl_fntype)(CUdevice device, const char* name);
41
+ typedef void (NVTX_API * nvtxNameCuDeviceW_impl_fntype)(CUdevice device, const wchar_t* name);
42
+ typedef void (NVTX_API * nvtxNameCuContextA_impl_fntype)(CUcontext context, const char* name);
43
+ typedef void (NVTX_API * nvtxNameCuContextW_impl_fntype)(CUcontext context, const wchar_t* name);
44
+ typedef void (NVTX_API * nvtxNameCuStreamA_impl_fntype)(CUstream stream, const char* name);
45
+ typedef void (NVTX_API * nvtxNameCuStreamW_impl_fntype)(CUstream stream, const wchar_t* name);
46
+ typedef void (NVTX_API * nvtxNameCuEventA_impl_fntype)(CUevent event, const char* name);
47
+ typedef void (NVTX_API * nvtxNameCuEventW_impl_fntype)(CUevent event, const wchar_t* name);
48
+
49
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceA(CUdevice device, const char* name)
50
+ {
51
+ NVTX_SET_NAME_MANGLING_OPTIONS
52
+ #ifdef NVTX_DISABLE
53
+ (void)device;
54
+ (void)name;
55
+ #else /* NVTX_DISABLE */
56
+ nvtxNameCuDeviceA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr;
57
+ if (local != NVTX_NULLPTR)
58
+ (*local)(device, name);
59
+ #endif /* NVTX_DISABLE */
60
+ }
61
+
62
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceW(CUdevice device, const wchar_t* name)
63
+ {
64
+ NVTX_SET_NAME_MANGLING_OPTIONS
65
+ #ifdef NVTX_DISABLE
66
+ (void)device;
67
+ (void)name;
68
+ #else /* NVTX_DISABLE */
69
+ nvtxNameCuDeviceW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr;
70
+ if (local != NVTX_NULLPTR)
71
+ (*local)(device, name);
72
+ #endif /* NVTX_DISABLE */
73
+ }
74
+
75
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuContextA(CUcontext context, const char* name)
76
+ {
77
+ NVTX_SET_NAME_MANGLING_OPTIONS
78
+ #ifdef NVTX_DISABLE
79
+ (void)context;
80
+ (void)name;
81
+ #else /* NVTX_DISABLE */
82
+ nvtxNameCuContextA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCuContextA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr);
83
+ if (local != NVTX_NULLPTR)
84
+ (*local)(context, name);
85
+ #endif /* NVTX_DISABLE */
86
+ }
87
+
88
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuContextW(CUcontext context, const wchar_t* name)
89
+ {
90
+ NVTX_SET_NAME_MANGLING_OPTIONS
91
+ #ifdef NVTX_DISABLE
92
+ (void)context;
93
+ (void)name;
94
+ #else /* NVTX_DISABLE */
95
+ nvtxNameCuContextW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCuContextW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr);
96
+ if (local != NVTX_NULLPTR)
97
+ (*local)(context, name);
98
+ #endif /* NVTX_DISABLE */
99
+ }
100
+
101
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamA(CUstream stream, const char* name)
102
+ {
103
+ NVTX_SET_NAME_MANGLING_OPTIONS
104
+ #ifdef NVTX_DISABLE
105
+ (void)stream;
106
+ (void)name;
107
+ #else /* NVTX_DISABLE */
108
+ nvtxNameCuStreamA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCuStreamA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr);
109
+ if (local != NVTX_NULLPTR)
110
+ (*local)(stream, name);
111
+ #endif /* NVTX_DISABLE */
112
+ }
113
+
114
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamW(CUstream stream, const wchar_t* name)
115
+ {
116
+ NVTX_SET_NAME_MANGLING_OPTIONS
117
+ #ifdef NVTX_DISABLE
118
+ (void)stream;
119
+ (void)name;
120
+ #else /* NVTX_DISABLE */
121
+ nvtxNameCuStreamW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCuStreamW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr);
122
+ if (local != NVTX_NULLPTR)
123
+ (*local)(stream, name);
124
+ #endif /* NVTX_DISABLE */
125
+ }
126
+
127
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuEventA(CUevent event, const char* name)
128
+ {
129
+ NVTX_SET_NAME_MANGLING_OPTIONS
130
+ #ifdef NVTX_DISABLE
131
+ (void)event;
132
+ (void)name;
133
+ #else /* NVTX_DISABLE */
134
+ nvtxNameCuEventA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCuEventA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr);
135
+ if (local != NVTX_NULLPTR)
136
+ (*local)(event, name);
137
+ #endif /* NVTX_DISABLE */
138
+ }
139
+
140
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuEventW(CUevent event, const wchar_t* name)
141
+ {
142
+ NVTX_SET_NAME_MANGLING_OPTIONS
143
+ #ifdef NVTX_DISABLE
144
+ (void)event;
145
+ (void)name;
146
+ #else /* NVTX_DISABLE */
147
+ nvtxNameCuEventW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCuEventW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr);
148
+ if (local != NVTX_NULLPTR)
149
+ (*local)(event, name);
150
+ #endif /* NVTX_DISABLE */
151
+ }
152
+
153
+ #ifdef __cplusplus
154
+ } /* extern "C" */
155
+ #endif /* __cplusplus */
156
+
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplOpenCL_v3.h ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_IMPL_GUARD_OPENCL
22
+ #error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+
36
+ #ifdef __cplusplus
37
+ extern "C" {
38
+ #endif /* __cplusplus */
39
+
40
+ typedef void (NVTX_API * nvtxNameClDeviceA_impl_fntype)(cl_device_id device, const char* name);
41
+ typedef void (NVTX_API * nvtxNameClDeviceW_impl_fntype)(cl_device_id device, const wchar_t* name);
42
+ typedef void (NVTX_API * nvtxNameClContextA_impl_fntype)(cl_context context, const char* name);
43
+ typedef void (NVTX_API * nvtxNameClContextW_impl_fntype)(cl_context context, const wchar_t* name);
44
+ typedef void (NVTX_API * nvtxNameClCommandQueueA_impl_fntype)(cl_command_queue command_queue, const char* name);
45
+ typedef void (NVTX_API * nvtxNameClCommandQueueW_impl_fntype)(cl_command_queue command_queue, const wchar_t* name);
46
+ typedef void (NVTX_API * nvtxNameClMemObjectA_impl_fntype)(cl_mem memobj, const char* name);
47
+ typedef void (NVTX_API * nvtxNameClMemObjectW_impl_fntype)(cl_mem memobj, const wchar_t* name);
48
+ typedef void (NVTX_API * nvtxNameClSamplerA_impl_fntype)(cl_sampler sampler, const char* name);
49
+ typedef void (NVTX_API * nvtxNameClSamplerW_impl_fntype)(cl_sampler sampler, const wchar_t* name);
50
+ typedef void (NVTX_API * nvtxNameClProgramA_impl_fntype)(cl_program program, const char* name);
51
+ typedef void (NVTX_API * nvtxNameClProgramW_impl_fntype)(cl_program program, const wchar_t* name);
52
+ typedef void (NVTX_API * nvtxNameClEventA_impl_fntype)(cl_event evnt, const char* name);
53
+ typedef void (NVTX_API * nvtxNameClEventW_impl_fntype)(cl_event evnt, const wchar_t* name);
54
+
55
+ NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceA(cl_device_id device, const char* name)
56
+ {
57
+ NVTX_SET_NAME_MANGLING_OPTIONS
58
+ #ifdef NVTX_DISABLE
59
+ (void)device;
60
+ (void)name;
61
+ #else /* NVTX_DISABLE */
62
+ nvtxNameClDeviceA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClDeviceA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr);
63
+ if (local != NVTX_NULLPTR)
64
+ (*local)(device, name);
65
+ #endif /* NVTX_DISABLE */
66
+ }
67
+
68
+ NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceW(cl_device_id device, const wchar_t* name)
69
+ {
70
+ NVTX_SET_NAME_MANGLING_OPTIONS
71
+ #ifdef NVTX_DISABLE
72
+ (void)device;
73
+ (void)name;
74
+ #else /* NVTX_DISABLE */
75
+ nvtxNameClDeviceW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClDeviceW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr);
76
+ if (local != NVTX_NULLPTR)
77
+ (*local)(device, name);
78
+ #endif /* NVTX_DISABLE */
79
+ }
80
+
81
+ NVTX_DECLSPEC void NVTX_API nvtxNameClContextA(cl_context context, const char* name)
82
+ {
83
+ NVTX_SET_NAME_MANGLING_OPTIONS
84
+ #ifdef NVTX_DISABLE
85
+ (void)context;
86
+ (void)name;
87
+ #else /* NVTX_DISABLE */
88
+ nvtxNameClContextA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClContextA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr);
89
+ if (local != NVTX_NULLPTR)
90
+ (*local)(context, name);
91
+ #endif /* NVTX_DISABLE */
92
+ }
93
+
94
+ NVTX_DECLSPEC void NVTX_API nvtxNameClContextW(cl_context context, const wchar_t* name)
95
+ {
96
+ NVTX_SET_NAME_MANGLING_OPTIONS
97
+ #ifdef NVTX_DISABLE
98
+ (void)context;
99
+ (void)name;
100
+ #else /* NVTX_DISABLE */
101
+ nvtxNameClContextW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClContextW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr);
102
+ if (local != NVTX_NULLPTR)
103
+ (*local)(context, name);
104
+ #endif /* NVTX_DISABLE */
105
+ }
106
+
107
+ NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueA(cl_command_queue command_queue, const char* name)
108
+ {
109
+ NVTX_SET_NAME_MANGLING_OPTIONS
110
+ #ifdef NVTX_DISABLE
111
+ (void)command_queue;
112
+ (void)name;
113
+ #else /* NVTX_DISABLE */
114
+ nvtxNameClCommandQueueA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClCommandQueueA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr);
115
+ if (local != NVTX_NULLPTR)
116
+ (*local)(command_queue, name);
117
+ #endif /* NVTX_DISABLE */
118
+ }
119
+
120
+ NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueW(cl_command_queue command_queue, const wchar_t* name)
121
+ {
122
+ NVTX_SET_NAME_MANGLING_OPTIONS
123
+ #ifdef NVTX_DISABLE
124
+ (void)command_queue;
125
+ (void)name;
126
+ #else /* NVTX_DISABLE */
127
+ nvtxNameClCommandQueueW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClCommandQueueW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr);
128
+ if (local != NVTX_NULLPTR)
129
+ (*local)(command_queue, name);
130
+ #endif /* NVTX_DISABLE */
131
+ }
132
+
133
+ NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectA(cl_mem memobj, const char* name)
134
+ {
135
+ NVTX_SET_NAME_MANGLING_OPTIONS
136
+ #ifdef NVTX_DISABLE
137
+ (void)memobj;
138
+ (void)name;
139
+ #else /* NVTX_DISABLE */
140
+ nvtxNameClMemObjectA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClMemObjectA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr);
141
+ if (local != NVTX_NULLPTR)
142
+ (*local)(memobj, name);
143
+ #endif /* NVTX_DISABLE */
144
+ }
145
+
146
+ NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectW(cl_mem memobj, const wchar_t* name)
147
+ {
148
+ NVTX_SET_NAME_MANGLING_OPTIONS
149
+ #ifdef NVTX_DISABLE
150
+ (void)memobj;
151
+ (void)name;
152
+ #else /* NVTX_DISABLE */
153
+ nvtxNameClMemObjectW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClMemObjectW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr);
154
+ if (local != NVTX_NULLPTR)
155
+ (*local)(memobj, name);
156
+ #endif /* NVTX_DISABLE */
157
+ }
158
+
159
+ NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerA(cl_sampler sampler, const char* name)
160
+ {
161
+ NVTX_SET_NAME_MANGLING_OPTIONS
162
+ #ifdef NVTX_DISABLE
163
+ (void)sampler;
164
+ (void)name;
165
+ #else /* NVTX_DISABLE */
166
+ nvtxNameClSamplerA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClSamplerA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr);
167
+ if (local != NVTX_NULLPTR)
168
+ (*local)(sampler, name);
169
+ #endif /* NVTX_DISABLE */
170
+ }
171
+
172
+ NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerW(cl_sampler sampler, const wchar_t* name)
173
+ {
174
+ NVTX_SET_NAME_MANGLING_OPTIONS
175
+ #ifdef NVTX_DISABLE
176
+ (void)sampler;
177
+ (void)name;
178
+ #else /* NVTX_DISABLE */
179
+ nvtxNameClSamplerW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClSamplerW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr);
180
+ if (local != NVTX_NULLPTR)
181
+ (*local)(sampler, name);
182
+ #endif /* NVTX_DISABLE */
183
+ }
184
+
185
+ NVTX_DECLSPEC void NVTX_API nvtxNameClProgramA(cl_program program, const char* name)
186
+ {
187
+ NVTX_SET_NAME_MANGLING_OPTIONS
188
+ #ifdef NVTX_DISABLE
189
+ (void)program;
190
+ (void)name;
191
+ #else /* NVTX_DISABLE */
192
+ nvtxNameClProgramA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClProgramA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr);
193
+ if (local != NVTX_NULLPTR)
194
+ (*local)(program, name);
195
+ #endif /* NVTX_DISABLE */
196
+ }
197
+
198
+ NVTX_DECLSPEC void NVTX_API nvtxNameClProgramW(cl_program program, const wchar_t* name)
199
+ {
200
+ NVTX_SET_NAME_MANGLING_OPTIONS
201
+ #ifdef NVTX_DISABLE
202
+ (void)program;
203
+ (void)name;
204
+ #else /* NVTX_DISABLE */
205
+ nvtxNameClProgramW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClProgramW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr);
206
+ if (local != NVTX_NULLPTR)
207
+ (*local)(program, name);
208
+ #endif /* NVTX_DISABLE */
209
+ }
210
+
211
+ NVTX_DECLSPEC void NVTX_API nvtxNameClEventA(cl_event evnt, const char* name)
212
+ {
213
+ NVTX_SET_NAME_MANGLING_OPTIONS
214
+ #ifdef NVTX_DISABLE
215
+ (void)evnt;
216
+ (void)name;
217
+ #else /* NVTX_DISABLE */
218
+ nvtxNameClEventA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClEventA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr);
219
+ if (local != NVTX_NULLPTR)
220
+ (*local)(evnt, name);
221
+ #endif /* NVTX_DISABLE */
222
+ }
223
+
224
+ NVTX_DECLSPEC void NVTX_API nvtxNameClEventW(cl_event evnt, const wchar_t* name)
225
+ {
226
+ NVTX_SET_NAME_MANGLING_OPTIONS
227
+ #ifdef NVTX_DISABLE
228
+ (void)evnt;
229
+ (void)name;
230
+ #else /* NVTX_DISABLE */
231
+ nvtxNameClEventW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClEventW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr);
232
+ if (local != NVTX_NULLPTR)
233
+ (*local)(evnt, name);
234
+ #endif /* NVTX_DISABLE */
235
+ }
236
+
237
+ #ifdef __cplusplus
238
+ } /* extern "C" */
239
+ #endif /* __cplusplus */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplSync_v3.h ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_IMPL_GUARD_SYNC
22
+ #error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+
36
+ #ifdef __cplusplus
37
+ extern "C" {
38
+ #endif /* __cplusplus */
39
+
40
+ typedef nvtxSyncUser_t (NVTX_API * nvtxDomainSyncUserCreate_impl_fntype)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
41
+ typedef void (NVTX_API * nvtxDomainSyncUserDestroy_impl_fntype)(nvtxSyncUser_t handle);
42
+ typedef void (NVTX_API * nvtxDomainSyncUserAcquireStart_impl_fntype)(nvtxSyncUser_t handle);
43
+ typedef void (NVTX_API * nvtxDomainSyncUserAcquireFailed_impl_fntype)(nvtxSyncUser_t handle);
44
+ typedef void (NVTX_API * nvtxDomainSyncUserAcquireSuccess_impl_fntype)(nvtxSyncUser_t handle);
45
+ typedef void (NVTX_API * nvtxDomainSyncUserReleasing_impl_fntype)(nvtxSyncUser_t handle);
46
+
47
+ NVTX_DECLSPEC nvtxSyncUser_t NVTX_API nvtxDomainSyncUserCreate(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs)
48
+ {
49
+ NVTX_SET_NAME_MANGLING_OPTIONS
50
+ #ifdef NVTX_DISABLE
51
+ (void)domain;
52
+ (void)attribs;
53
+ #else /* NVTX_DISABLE */
54
+ nvtxDomainSyncUserCreate_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxDomainSyncUserCreate_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr);
55
+ if (local != NVTX_NULLPTR)
56
+ return (*local)(domain, attribs);
57
+ else
58
+ #endif /* NVTX_DISABLE */
59
+ return NVTX_NULLPTR;
60
+ }
61
+
62
+ NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserDestroy(nvtxSyncUser_t handle)
63
+ {
64
+ NVTX_SET_NAME_MANGLING_OPTIONS
65
+ #ifdef NVTX_DISABLE
66
+ (void)handle;
67
+ #else /* NVTX_DISABLE */
68
+ nvtxDomainSyncUserDestroy_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxDomainSyncUserDestroy_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr);
69
+ if (local != NVTX_NULLPTR)
70
+ (*local)(handle);
71
+ #endif /* NVTX_DISABLE */
72
+ }
73
+
74
+ NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireStart(nvtxSyncUser_t handle)
75
+ {
76
+ NVTX_SET_NAME_MANGLING_OPTIONS
77
+ #ifdef NVTX_DISABLE
78
+ (void)handle;
79
+ #else /* NVTX_DISABLE */
80
+ nvtxDomainSyncUserAcquireStart_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxDomainSyncUserAcquireStart_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr);
81
+ if (local != NVTX_NULLPTR)
82
+ (*local)(handle);
83
+ #endif /* NVTX_DISABLE */
84
+ }
85
+
86
+ NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireFailed(nvtxSyncUser_t handle)
87
+ {
88
+ NVTX_SET_NAME_MANGLING_OPTIONS
89
+ #ifdef NVTX_DISABLE
90
+ (void)handle;
91
+ #else /* NVTX_DISABLE */
92
+ nvtxDomainSyncUserAcquireFailed_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxDomainSyncUserAcquireFailed_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr);
93
+ if (local != NVTX_NULLPTR)
94
+ (*local)(handle);
95
+ #endif /* NVTX_DISABLE */
96
+ }
97
+
98
+ NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireSuccess(nvtxSyncUser_t handle)
99
+ {
100
+ NVTX_SET_NAME_MANGLING_OPTIONS
101
+ #ifdef NVTX_DISABLE
102
+ (void)handle;
103
+ #else /* NVTX_DISABLE */
104
+ nvtxDomainSyncUserAcquireSuccess_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxDomainSyncUserAcquireSuccess_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr);
105
+ if (local != NVTX_NULLPTR)
106
+ (*local)(handle);
107
+ #endif /* NVTX_DISABLE */
108
+ }
109
+
110
+ NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserReleasing(nvtxSyncUser_t handle)
111
+ {
112
+ NVTX_SET_NAME_MANGLING_OPTIONS
113
+ #ifdef NVTX_DISABLE
114
+ (void)handle;
115
+ #else /* NVTX_DISABLE */
116
+ nvtxDomainSyncUserReleasing_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxDomainSyncUserReleasing_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr);
117
+ if (local != NVTX_NULLPTR)
118
+ (*local)(handle);
119
+ #endif /* NVTX_DISABLE */
120
+ }
121
+
122
+ #ifdef __cplusplus
123
+ } /* extern "C" */
124
+ #endif /* __cplusplus */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxInit.h ADDED
@@ -0,0 +1,468 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_IMPL_GUARD
22
+ #error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+ /* ---- Platform-independent helper definitions and functions ---- */
36
+
37
+ /* Prefer macros over inline functions to reduce symbol resolution at link time */
38
+
39
+ #if defined(_WIN32)
40
+ #define NVTX_PATHCHAR wchar_t
41
+ #define NVTX_STR(x) L##x
42
+ #define NVTX_GETENV _wgetenv
43
+ #define NVTX_BUFSIZE 16384
44
+ #define NVTX_DLLHANDLE HMODULE
45
+ #define NVTX_DLLOPEN(x) LoadLibraryW(x)
46
+ #define NVTX_DLLFUNC(h, x) NVTX_REINTERPRET_CAST(void(*)(void), GetProcAddress((h), (x)))
47
+ #define NVTX_DLLCLOSE FreeLibrary
48
+ #define NVTX_DLLDEFAULT NVTX_NULLPTR
49
+ #define NVTX_YIELD() SwitchToThread()
50
+ #define NVTX_MEMBAR() MemoryBarrier()
51
+ #define NVTX_ATOMIC_WRITE_32(address, value) \
52
+ InterlockedExchange(NVTX_REINTERPRET_CAST(volatile LONG*, (address)), (value))
53
+ #define NVTX_ATOMIC_CAS_32(old, address, exchange, comparand) \
54
+ (old) = InterlockedCompareExchange(NVTX_REINTERPRET_CAST(volatile LONG*, (address)), (exchange), (comparand))
55
+ #elif defined(__GNUC__)
56
+ #define NVTX_PATHCHAR char
57
+ #define NVTX_STR(x) x
58
+ #define NVTX_GETENV getenv
59
+ #define NVTX_BUFSIZE 16384
60
+ #define NVTX_DLLHANDLE void*
61
+ #define NVTX_DLLOPEN(x) dlopen(x, RTLD_LAZY)
62
+ #define NVTX_DLLFUNC(h, x) dlsym((h), (x))
63
+ #define NVTX_DLLCLOSE dlclose
64
+ #if !defined(__APPLE__)
65
+ #define NVTX_DLLDEFAULT NVTX_NULLPTR
66
+ #else
67
+ #define NVTX_DLLDEFAULT RTLD_DEFAULT
68
+ #endif
69
+ #define NVTX_YIELD() sched_yield()
70
+ #define NVTX_MEMBAR() __sync_synchronize()
71
+ /* Ensure full memory barrier for atomics, to match Windows functions */
72
+ #define NVTX_ATOMIC_WRITE_32(address, value) \
73
+ __sync_synchronize(); *(address) = (value); __sync_synchronize()
74
+ #define NVTX_ATOMIC_CAS_32(old, address, exchange, comparand) \
75
+ (old) = __sync_val_compare_and_swap((address), (comparand), (exchange))
76
+ #else
77
+ #error The library does not support your configuration!
78
+ #endif
79
+
80
+ /* NVTX_LOAD_SEQUENCE_VERSION macro
81
+ *
82
+ * NVTX3 can update the search sequence used for finding a suitable injection library.
83
+ * If multiple copies of the NVTX3 headers are included in the same translation unit,
84
+ * the one included first sets the loader sequence. If there is any problem where a
85
+ * tool is expected to load, but is not loading, the app can test this macro to verify
86
+ * which version of the search is being used. Check if NVTX_LOAD_SEQUENCE_VERSION is
87
+ * defined; if it is not, the version is 1. Otherwise, the version is indicated by
88
+ * the value of NVTX_LOAD_SEQUENCE_VERSION.
89
+ *
90
+ * Version history:
91
+ * 1: NVTX3 initial implementation. The search continues until a usable function
92
+ * pointer is found. If none is found, init aborts and rolls back anything it
93
+ * did during the search (e.g. any loaded libraries are unloaded). If a non-zero
94
+ * function pointer is found, it is called. If that function returns non-zero
95
+ * ("true" in C), that indicates a tool successfully initialized. If it returns
96
+ * zero ("false"), the tool init was unsuccessful, so init aborts and rolls back
97
+ * anything it did. No further attempt is made to search for a different init
98
+ * function if the first one found returns false. The search order is:
99
+ * - Check for env var NVTX_INJECTION64_PATH (or "32" in 32-bit process)
100
+ * - Treat env var value as path to dynamic library, try loading it
101
+ * - If it loads, try get the exported symbol "InitializeInjectionNvtx2"
102
+ * - If this returns a non-null pointer, the search finishes here
103
+ * - (Android only) Look for libNvtxInjection64.so (or "32" in 32-bit process)
104
+ * - Must be in the /data/data/<package name>/files" directory
105
+ * - Treat env var value as path to dynamic library, try loading it
106
+ * - If it loads, try get the exported symbol "InitializeInjectionNvtx2"
107
+ * - If this returns a non-null pointer, the search finishes here
108
+ * Note: There were two other options partially implemented, but disabled.
109
+ * - For supporting a pre-injected library on POSIX platforms, e.g. with
110
+ * LD_PRELOAD, try using dlsym with a null module handle to get the init
111
+ * function. This was unconditionally disabled after finding cases where
112
+ * a tool loaded multiple injections that supported NVTX, and couldn't
113
+ * control which one was getting picked by the NVTX loader.
114
+ * - (Linux only, not including Cygwin) Check for static injection using a
115
+ * weak symbol. This was implemented incorrectly, so it wasn't usable.
116
+ *
117
+ * 2: Fix the support for static injection libraries. This is meant for cases
118
+ * where dlopen is not supported or allowed, and the executable format has
119
+ * support for weak symbols. Tools may provide a static library with a
120
+ * C-linkage symbol named "InitializeInjectionNvtx2_fnptr", whose type is
121
+ * NvtxInitializeInjectionNvtxFunc_t, i.e. a function pointer to NVTX init
122
+ * function. If such a symbol is provided by a static library, the NVTX
123
+ * loader's weak symbol will bind to it and call it for initialization.
124
+ * Otherwise, the weak symbol will be defined by NVTX and default to null,
125
+ * indicating no static injection library is present. Static injection is
126
+ * last in the load sequence, because it gives all the run-time methods of
127
+ * injection to override a program's compiled-in tool without rebuilding the
128
+ * program. The search order is:
129
+ * - Check for env var NVTX_INJECTION64_PATH (or "32" in 32-bit process)
130
+ * - Treat env var value as path to dynamic library, try loading it
131
+ * - If it loads, try get the exported symbol "InitializeInjectionNvtx2"
132
+ * - If this returns a non-null pointer, the search finishes here
133
+ * - (Android only) Look for libNvtxInjection64.so (or "32" in 32-bit process)
134
+ * - Must be in the /data/data/<package name>/files" directory
135
+ * - Treat env var value as path to dynamic library, try loading it
136
+ * - If it loads, try get the exported symbol "InitializeInjectionNvtx2"
137
+ * - If this returns a non-null pointer, the search finishes here
138
+ * - (Currently disabled, experimental support for non-Windows) Use dlsym
139
+ * with a null module handle to query the process-wide dynamic symbol
140
+ * table for a function named "InitializeInjectionNvtx2Preinject". The
141
+ * symbol is different to prevent injections from being loaded this way
142
+ * unless they choose to do so.
143
+ * - If this returns a non-null pointer, the search finishes here
144
+ * - (GCC-like compilers with ELF binary targets only) Check for static
145
+ * injection using a weak symbol "InitializeInjectionNvtx2_fnptr".
146
+ * If the default support choices in this header are not working as expected,
147
+ * clients may now override load sequence support decisions by defining these
148
+ * macros before including the NVTX header files:
149
+ * - NVTX_SUPPORT_ENV_VARS
150
+ * - NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY
151
+ * - NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE
152
+ * - NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
153
+ * - NVTX_SUPPORT_STATIC_INJECTION_LIBRARY
154
+ */
155
+ #define NVTX_LOAD_SEQUENCE_VERSION 2
156
+
157
+ #ifndef NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
158
+ /* Define this to 1 for platforms that where pre-injected libraries can be discovered. */
159
+ #if defined(_WIN32)
160
+ /* Windows has no process-wide table of dynamic library symbols, so this can't be supported. */
161
+ #define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0
162
+ #else
163
+ /* POSIX platforms allow calling dlsym on a null module to use the process-wide table.
164
+ * Note: Still disabled in load sequence version 2. Needs to support following the
165
+ * RTLD_NEXT chain, and needs more testing before support can be enabled by default.*/
166
+ #define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0
167
+ #endif
168
+ #endif
169
+
170
+ #ifndef NVTX_SUPPORT_ENV_VARS
171
+ /* Define this to 1 for platforms that support environment variables */
172
+ /* TODO: Detect UWP, a.k.a. Windows Store app, and set this to 0. */
173
+ /* Try: #if defined(WINAPI_FAMILY_PARTITION) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) */
174
+ #define NVTX_SUPPORT_ENV_VARS 1
175
+ #endif
176
+
177
+ #ifndef NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY
178
+ /* Define this to 1 for platforms that support dynamic/shared libraries */
179
+ #define NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY 1
180
+ #endif
181
+
182
+ #ifndef NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE
183
+ #if defined(__ANDROID__)
184
+ #define NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE 1
185
+ #else
186
+ #define NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE 0
187
+ #endif
188
+ #endif
189
+
190
+ #ifndef NVTX_SUPPORT_STATIC_INJECTION_LIBRARY
191
+ /* On platforms that support weak symbols (i.e. non-Windows), injection libraries may
192
+ * be statically linked into an application. This is useful for platforms where dynamic
193
+ * injection is not available. Weak symbols not marked extern are definitions, not just
194
+ * declarations. They are guaranteed to be initialized to zero if no normal definitions
195
+ * are found by the linker to override them. This means the NVTX load sequence can safely
196
+ * detect the presence of a static injection -- if InitializeInjectionNvtx2_fnptr is zero,
197
+ * there is no static injection. */
198
+ #if defined(__GNUC__) && !defined(_WIN32) && !defined(__CYGWIN__)
199
+ #define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 1
200
+ #else
201
+ #define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 0
202
+ #endif
203
+ #endif
204
+
205
+ #if NVTX_SUPPORT_STATIC_INJECTION_LIBRARY && !defined(NVTX_STATIC_INJECTION_IMPL)
206
+ /* To make an NVTX injection library support static injection, it must do these things:
207
+ * - Define InitializeInjectionNvtx2_fnptr as a normal symbol (not weak), pointing to
208
+ * the implementation of InitializeInjectionNvtx2 (which does not need to be a
209
+ * dynamic export if only supporting static injection).
210
+ * - Define NVTX_STATIC_INJECTION_IMPL so the weak definition below is skipped.
211
+ * - Compile the static injection files with -fPIC if they are to be linked with other
212
+ * files compiled this way. If you forget this, GCC will simply tell you to add it.
213
+ * When building the application, there a few ways to link in a static injection:
214
+ * - Compile the injection's source files normally, and include the .o files as inputs
215
+ * to the linker.
216
+ * - If the injection is provided as an archive (.a file), it will not resolve any
217
+ * unresolved symbols, so the linker will skip it by default. This can be fixed
218
+ * by wrapping the static injection's name on the linker command line with options
219
+ * to treat it differently. For example:
220
+ * gcc example.o libfoo.a -Wl,--whole-archive libinj-static.a -Wl,--no-whole-archive libbar.a
221
+ * Note that libinj-static.a is bracketed by options to turn on "whole archive" and
222
+ * then back off again afterwards, so libfoo.a and libbar.a are linked normally.
223
+ * - In CMake, a static injection can be added with options like this:
224
+ * target_link_libraries(app PRIVATE -Wl,--whole-archive inj-static -Wl,--no-whole-archive)
225
+ */
226
+ __attribute__((weak)) NvtxInitializeInjectionNvtxFunc_t InitializeInjectionNvtx2_fnptr;
227
+ #endif
228
+
229
+ /* This function tries to find or load an NVTX injection library and get the
230
+ * address of its InitializeInjection2 function. If such a function pointer
231
+ * is found, it is called, and passed the address of this NVTX instance's
232
+ * nvtxGetExportTable function, so the injection can attach to this instance.
233
+ * If the initialization fails for any reason, any dynamic library loaded will
234
+ * be freed, and all NVTX implementation functions will be set to no-ops. If
235
+ * initialization succeeds, NVTX functions not attached to the tool will be set
236
+ * to no-ops. This is implemented as one function instead of several small
237
+ * functions to minimize the number of weak symbols the linker must resolve.
238
+ * Order of search is:
239
+ * - Pre-injected library exporting InitializeInjectionNvtx2
240
+ * - Loadable library exporting InitializeInjectionNvtx2
241
+ * - Path specified by env var NVTX_INJECTION??_PATH (?? is 32 or 64)
242
+ * - On Android, libNvtxInjection??.so within the package (?? is 32 or 64)
243
+ * - Statically-linked injection library defining InitializeInjectionNvtx2_fnptr
244
+ */
245
+ NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)(void);
246
+ NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)(void)
247
+ {
248
+ static const char initFuncName[] = "InitializeInjectionNvtx2";
249
+ #if NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
250
+ static const char initFuncPreinjectName[] = "InitializeInjectionNvtx2Preinject";
251
+ #endif
252
+ NvtxInitializeInjectionNvtxFunc_t init_fnptr = NVTX_NULLPTR;
253
+ NVTX_DLLHANDLE injectionLibraryHandle = NVTX_DLLDEFAULT;
254
+ int entryPointStatus = 0;
255
+
256
+ #if NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY
257
+ /* Try discovering dynamic injection library to load */
258
+ {
259
+ #if NVTX_SUPPORT_ENV_VARS
260
+ /* If env var NVTX_INJECTION64_PATH is set, it should contain the path
261
+ * to a 64-bit dynamic NVTX injection library (and similar for 32-bit). */
262
+ const NVTX_PATHCHAR* const nvtxEnvVarName = (sizeof(void*) == 4)
263
+ ? NVTX_STR("NVTX_INJECTION32_PATH")
264
+ : NVTX_STR("NVTX_INJECTION64_PATH");
265
+ #endif /* NVTX_SUPPORT_ENV_VARS */
266
+ NVTX_PATHCHAR injectionLibraryPathBuf[NVTX_BUFSIZE];
267
+ const NVTX_PATHCHAR* injectionLibraryPath = NVTX_NULLPTR;
268
+
269
+ /* Refer to this variable explicitly in case all references to it are #if'ed out */
270
+ (void)injectionLibraryPathBuf;
271
+
272
+ #if NVTX_SUPPORT_ENV_VARS
273
+ /* Disable the warning for getenv & _wgetenv -- this usage is safe because
274
+ * these functions are not called again before using the returned value. */
275
+ #if defined(_MSC_VER)
276
+ #pragma warning( push )
277
+ #pragma warning( disable : 4996 )
278
+ #endif
279
+ injectionLibraryPath = NVTX_GETENV(nvtxEnvVarName);
280
+ #if defined(_MSC_VER)
281
+ #pragma warning( pop )
282
+ #endif
283
+ #endif
284
+
285
+ #if NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE
286
+ if (!injectionLibraryPath)
287
+ {
288
+ const char *bits = (sizeof(void*) == 4) ? "32" : "64";
289
+ char cmdlineBuf[32];
290
+ char pkgName[PATH_MAX];
291
+ int count;
292
+ int pid;
293
+ FILE *fp;
294
+ size_t bytesRead;
295
+ size_t pos;
296
+
297
+ pid = NVTX_STATIC_CAST(int, getpid());
298
+ count = snprintf(cmdlineBuf, sizeof(cmdlineBuf), "/proc/%d/cmdline", pid);
299
+ if (count <= 0 || count >= NVTX_STATIC_CAST(int, sizeof(cmdlineBuf)))
300
+ {
301
+ NVTX_ERR("Path buffer too small for: /proc/%d/cmdline\n", pid);
302
+ return NVTX_ERR_INIT_ACCESS_LIBRARY;
303
+ }
304
+
305
+ fp = fopen(cmdlineBuf, "r");
306
+ if (!fp)
307
+ {
308
+ NVTX_ERR("File couldn't be opened: %s\n", cmdlineBuf);
309
+ return NVTX_ERR_INIT_ACCESS_LIBRARY;
310
+ }
311
+
312
+ bytesRead = fread(pkgName, 1, sizeof(pkgName) - 1, fp);
313
+ fclose(fp);
314
+ if (bytesRead == 0)
315
+ {
316
+ NVTX_ERR("Package name couldn't be read from file: %s\n", cmdlineBuf);
317
+ return NVTX_ERR_INIT_ACCESS_LIBRARY;
318
+ }
319
+
320
+ pkgName[bytesRead] = 0;
321
+
322
+ /* String can contain colon as a process separator. In this case the package name is before the colon. */
323
+ pos = 0;
324
+ while (pos < bytesRead && pkgName[pos] != ':' && pkgName[pos] != '\0')
325
+ {
326
+ ++pos;
327
+ }
328
+ pkgName[pos] = 0;
329
+
330
+ count = snprintf(injectionLibraryPathBuf, NVTX_BUFSIZE, "/data/data/%s/files/libNvtxInjection%s.so", pkgName, bits);
331
+ if (count <= 0 || count >= NVTX_BUFSIZE)
332
+ {
333
+ NVTX_ERR("Path buffer too small for: /data/data/%s/files/libNvtxInjection%s.so\n", pkgName, bits);
334
+ return NVTX_ERR_INIT_ACCESS_LIBRARY;
335
+ }
336
+
337
+ /* On Android, verify path is accessible due to aggressive file access restrictions. */
338
+ /* For dlopen, if the filename contains a leading slash, then it is interpreted as a */
339
+ /* relative or absolute pathname; otherwise it will follow the rules in ld.so. */
340
+ if (injectionLibraryPathBuf[0] == '/')
341
+ {
342
+ #if (__ANDROID_API__ < 21)
343
+ int access_err = access(injectionLibraryPathBuf, F_OK | R_OK);
344
+ #else
345
+ int access_err = faccessat(AT_FDCWD, injectionLibraryPathBuf, F_OK | R_OK, 0);
346
+ #endif
347
+ if (access_err != 0)
348
+ {
349
+ NVTX_ERR("Injection library path wasn't accessible [code=%s] [path=%s]\n", strerror(errno), injectionLibraryPathBuf);
350
+ return NVTX_ERR_INIT_ACCESS_LIBRARY;
351
+ }
352
+ }
353
+ injectionLibraryPath = injectionLibraryPathBuf;
354
+ }
355
+ #endif /* NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE */
356
+
357
+ /* At this point, injectionLibraryPath is specified if a dynamic
358
+ * injection library was specified by a tool. */
359
+ if (injectionLibraryPath)
360
+ {
361
+ /* Load the injection library */
362
+ injectionLibraryHandle = NVTX_DLLOPEN(injectionLibraryPath);
363
+ if (!injectionLibraryHandle)
364
+ {
365
+ NVTX_ERR("Failed to load injection library\n");
366
+ return NVTX_ERR_INIT_LOAD_LIBRARY;
367
+ }
368
+ else
369
+ {
370
+ /* Attempt to get the injection library's entry-point */
371
+ init_fnptr = NVTX_REINTERPRET_CAST(NvtxInitializeInjectionNvtxFunc_t, NVTX_DLLFUNC(injectionLibraryHandle, initFuncName));
372
+ if (!init_fnptr)
373
+ {
374
+ NVTX_DLLCLOSE(injectionLibraryHandle);
375
+ NVTX_ERR("Failed to get address of function InitializeInjectionNvtx2 from injection library\n");
376
+ return NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT;
377
+ }
378
+ }
379
+ }
380
+ }
381
+ #endif /* NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY */
382
+
383
+ #if NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
384
+ if (!init_fnptr)
385
+ {
386
+ /* Use POSIX global symbol chain to query for init function from any module */
387
+ init_fnptr = NVTX_REINTERPRET_CAST(NvtxInitializeInjectionNvtxFunc_t, NVTX_DLLFUNC(NVTX_DLLDEFAULT, initFuncPreinjectName));
388
+ }
389
+ #endif
390
+
391
+ #if NVTX_SUPPORT_STATIC_INJECTION_LIBRARY
392
+ if (!init_fnptr)
393
+ {
394
+ /* Check weakly-defined function pointer. A statically-linked injection can define this
395
+ * as a normal symbol and set it to the address of the NVTX init function -- this will
396
+ * provide a non-null value here. If there is no other definition of this symbol, it
397
+ * will be null here. */
398
+ if (InitializeInjectionNvtx2_fnptr)
399
+ {
400
+ init_fnptr = InitializeInjectionNvtx2_fnptr;
401
+ }
402
+ }
403
+ #endif
404
+
405
+ /* At this point, if init_fnptr is not set, then no tool has specified
406
+ * an NVTX injection library -- return non-success result so all NVTX
407
+ * API functions will be set to no-ops. */
408
+ if (!init_fnptr)
409
+ {
410
+ return NVTX_ERR_NO_INJECTION_LIBRARY_AVAILABLE;
411
+ }
412
+
413
+ /* Invoke injection library's initialization function. If it returns
414
+ * 0 (failure) and a dynamic injection was loaded, unload it. */
415
+ entryPointStatus = init_fnptr(NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable));
416
+ if (entryPointStatus == 0)
417
+ {
418
+ NVTX_ERR("Failed to initialize injection library -- initialization function returned 0\n");
419
+ if (injectionLibraryHandle)
420
+ {
421
+ NVTX_DLLCLOSE(injectionLibraryHandle);
422
+ }
423
+ return NVTX_ERR_INIT_FAILED_LIBRARY_ENTRY_POINT;
424
+ }
425
+
426
+ return NVTX_SUCCESS;
427
+ }
428
+
429
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(void)
430
+ {
431
+ unsigned int old;
432
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState == NVTX_INIT_STATE_COMPLETE)
433
+ {
434
+ return;
435
+ }
436
+
437
+ NVTX_ATOMIC_CAS_32(
438
+ old,
439
+ &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState,
440
+ NVTX_INIT_STATE_STARTED,
441
+ NVTX_INIT_STATE_FRESH);
442
+ if (old == NVTX_INIT_STATE_FRESH)
443
+ {
444
+ int result;
445
+ int forceAllToNoops;
446
+
447
+ /* Load & initialize injection library -- it will assign the function pointers */
448
+ result = NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)();
449
+
450
+ /* Set all pointers not assigned by the injection to null */
451
+ forceAllToNoops = result != NVTX_SUCCESS; /* Set all to null if injection init failed */
452
+ NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(forceAllToNoops);
453
+
454
+ /* Signal that initialization has finished, so now the assigned function pointers will be used */
455
+ NVTX_ATOMIC_WRITE_32(
456
+ &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState,
457
+ NVTX_INIT_STATE_COMPLETE);
458
+ }
459
+ else /* Spin-wait until initialization has finished */
460
+ {
461
+ NVTX_MEMBAR();
462
+ while (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState != NVTX_INIT_STATE_COMPLETE)
463
+ {
464
+ NVTX_YIELD();
465
+ NVTX_MEMBAR();
466
+ }
467
+ }
468
+ }
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxInitDecls.h ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_IMPL_GUARD
22
+ #error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init)(const nvtxEventAttributes_t* eventAttrib);
36
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init)(const char* message);
37
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init)(const wchar_t* message);
38
+ NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init)(const nvtxEventAttributes_t* eventAttrib);
39
+ NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init)(const char* message);
40
+ NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init)(const wchar_t* message);
41
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init)(nvtxRangeId_t id);
42
+ NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init)(const nvtxEventAttributes_t* eventAttrib);
43
+ NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init)(const char* message);
44
+ NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init)(const wchar_t* message);
45
+ NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init)(void);
46
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init)(uint32_t category, const char* name);
47
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init)(uint32_t category, const wchar_t* name);
48
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init)(uint32_t threadId, const char* name);
49
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init)(uint32_t threadId, const wchar_t* name);
50
+
51
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init)(nvtx_CUdevice device, const char* name);
52
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init)(nvtx_CUdevice device, const wchar_t* name);
53
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init)(nvtx_CUcontext context, const char* name);
54
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init)(nvtx_CUcontext context, const wchar_t* name);
55
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init)(nvtx_CUstream stream, const char* name);
56
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init)(nvtx_CUstream stream, const wchar_t* name);
57
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init)(nvtx_CUevent event, const char* name);
58
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init)(nvtx_CUevent event, const wchar_t* name);
59
+
60
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init)(nvtx_cl_device_id device, const char* name);
61
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init)(nvtx_cl_device_id device, const wchar_t* name);
62
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init)(nvtx_cl_context context, const char* name);
63
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init)(nvtx_cl_context context, const wchar_t* name);
64
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init)(nvtx_cl_command_queue command_queue, const char* name);
65
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init)(nvtx_cl_command_queue command_queue, const wchar_t* name);
66
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init)(nvtx_cl_mem memobj, const char* name);
67
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init)(nvtx_cl_mem memobj, const wchar_t* name);
68
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init)(nvtx_cl_sampler sampler, const char* name);
69
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init)(nvtx_cl_sampler sampler, const wchar_t* name);
70
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init)(nvtx_cl_program program, const char* name);
71
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init)(nvtx_cl_program program, const wchar_t* name);
72
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init)(nvtx_cl_event evnt, const char* name);
73
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init)(nvtx_cl_event evnt, const wchar_t* name);
74
+
75
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init)(int device, const char* name);
76
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init)(int device, const wchar_t* name);
77
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init)(nvtx_cudaStream_t stream, const char* name);
78
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init)(nvtx_cudaStream_t stream, const wchar_t* name);
79
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init)(nvtx_cudaEvent_t event, const char* name);
80
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init)(nvtx_cudaEvent_t event, const wchar_t* name);
81
+
82
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
83
+ NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
84
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init)(nvtxDomainHandle_t domain, nvtxRangeId_t id);
85
+ NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
86
+ NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init)(nvtxDomainHandle_t domain);
87
+ NVTX_LINKONCE_FWDDECL_FUNCTION nvtxResourceHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs);
88
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init)(nvtxResourceHandle_t resource);
89
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const char* name);
90
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name);
91
+ NVTX_LINKONCE_FWDDECL_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init)(nvtxDomainHandle_t domain, const char* string);
92
+ NVTX_LINKONCE_FWDDECL_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init)(nvtxDomainHandle_t domain, const wchar_t* string);
93
+ NVTX_LINKONCE_FWDDECL_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init)(const char* message);
94
+ NVTX_LINKONCE_FWDDECL_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init)(const wchar_t* message);
95
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init)(nvtxDomainHandle_t domain);
96
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init)(const void* reserved);
97
+
98
+ NVTX_LINKONCE_FWDDECL_FUNCTION nvtx_nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtx_nvtxSyncUserAttributes_t* attribs);
99
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtx_nvtxSyncUser_t handle);
100
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtx_nvtxSyncUser_t handle);
101
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtx_nvtxSyncUser_t handle);
102
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtx_nvtxSyncUser_t handle);
103
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtx_nvtxSyncUser_t handle);
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxInitDefs.h ADDED
@@ -0,0 +1,595 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #ifndef NVTX_IMPL_GUARD
22
+ #error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
23
+ #endif
24
+
25
+ #if defined(NVTX_AS_SYSTEM_HEADER)
26
+ #if defined(__clang__)
27
+ #pragma clang system_header
28
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
29
+ #pragma GCC system_header
30
+ #elif defined(_MSC_VER)
31
+ #pragma system_header
32
+ #endif
33
+ #endif
34
+
35
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
36
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
37
+ nvtxMarkEx(eventAttrib);
38
+ }
39
+
40
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init)(const char* message){
41
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
42
+ nvtxMarkA(message);
43
+ }
44
+
45
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init)(const wchar_t* message){
46
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
47
+ nvtxMarkW(message);
48
+ }
49
+
50
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
51
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
52
+ return nvtxRangeStartEx(eventAttrib);
53
+ }
54
+
55
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init)(const char* message){
56
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
57
+ return nvtxRangeStartA(message);
58
+ }
59
+
60
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init)(const wchar_t* message){
61
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
62
+ return nvtxRangeStartW(message);
63
+ }
64
+
65
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init)(nvtxRangeId_t id){
66
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
67
+ nvtxRangeEnd(id);
68
+ }
69
+
70
+ NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
71
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
72
+ return nvtxRangePushEx(eventAttrib);
73
+ }
74
+
75
+ NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init)(const char* message){
76
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
77
+ return nvtxRangePushA(message);
78
+ }
79
+
80
+ NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init)(const wchar_t* message){
81
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
82
+ return nvtxRangePushW(message);
83
+ }
84
+
85
+ NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init)(void){
86
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
87
+ return nvtxRangePop();
88
+ }
89
+
90
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init)(uint32_t category, const char* name){
91
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
92
+ nvtxNameCategoryA(category, name);
93
+ }
94
+
95
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init)(uint32_t category, const wchar_t* name){
96
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
97
+ nvtxNameCategoryW(category, name);
98
+ }
99
+
100
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init)(uint32_t threadId, const char* name){
101
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
102
+ nvtxNameOsThreadA(threadId, name);
103
+ }
104
+
105
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init)(uint32_t threadId, const wchar_t* name){
106
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
107
+ nvtxNameOsThreadW(threadId, name);
108
+ }
109
+
110
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
111
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
112
+ nvtxDomainMarkEx(domain, eventAttrib);
113
+ }
114
+
115
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
116
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
117
+ return nvtxDomainRangeStartEx(domain, eventAttrib);
118
+ }
119
+
120
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init)(nvtxDomainHandle_t domain, nvtxRangeId_t id){
121
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
122
+ nvtxDomainRangeEnd(domain, id);
123
+ }
124
+
125
+ NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
126
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
127
+ return nvtxDomainRangePushEx(domain, eventAttrib);
128
+ }
129
+
130
+ NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init)(nvtxDomainHandle_t domain){
131
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
132
+ return nvtxDomainRangePop(domain);
133
+ }
134
+
135
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxResourceHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs){
136
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
137
+ return nvtxDomainResourceCreate(domain, attribs);
138
+ }
139
+
140
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init)(nvtxResourceHandle_t resource){
141
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
142
+ nvtxDomainResourceDestroy(resource);
143
+ }
144
+
145
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const char* name){
146
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
147
+ nvtxDomainNameCategoryA(domain, category, name);
148
+ }
149
+
150
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name){
151
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
152
+ nvtxDomainNameCategoryW(domain, category, name);
153
+ }
154
+
155
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init)(nvtxDomainHandle_t domain, const char* string){
156
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
157
+ return nvtxDomainRegisterStringA(domain, string);
158
+ }
159
+
160
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init)(nvtxDomainHandle_t domain, const wchar_t* string){
161
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
162
+ return nvtxDomainRegisterStringW(domain, string);
163
+ }
164
+
165
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init)(const char* message){
166
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
167
+ return nvtxDomainCreateA(message);
168
+ }
169
+
170
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init)(const wchar_t* message){
171
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
172
+ return nvtxDomainCreateW(message);
173
+ }
174
+
175
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init)(nvtxDomainHandle_t domain){
176
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
177
+ nvtxDomainDestroy(domain);
178
+ }
179
+
180
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init)(const void* reserved){
181
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
182
+ nvtxInitialize(reserved);
183
+ }
184
+
185
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init)(nvtx_CUdevice device, const char* name){
186
+ nvtxNameCuDeviceA_fakeimpl_fntype local;
187
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
188
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr;
189
+ if (local)
190
+ local(device, name);
191
+ }
192
+
193
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init)(nvtx_CUdevice device, const wchar_t* name){
194
+ nvtxNameCuDeviceW_fakeimpl_fntype local;
195
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
196
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr;
197
+ if (local)
198
+ local(device, name);
199
+ }
200
+
201
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init)(nvtx_CUcontext context, const char* name){
202
+ nvtxNameCuContextA_fakeimpl_fntype local;
203
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
204
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr;
205
+ if (local)
206
+ local(context, name);
207
+ }
208
+
209
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init)(nvtx_CUcontext context, const wchar_t* name){
210
+ nvtxNameCuContextW_fakeimpl_fntype local;
211
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
212
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr;
213
+ if (local)
214
+ local(context, name);
215
+ }
216
+
217
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init)(nvtx_CUstream stream, const char* name){
218
+ nvtxNameCuStreamA_fakeimpl_fntype local;
219
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
220
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr;
221
+ if (local)
222
+ local(stream, name);
223
+ }
224
+
225
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init)(nvtx_CUstream stream, const wchar_t* name){
226
+ nvtxNameCuStreamW_fakeimpl_fntype local;
227
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
228
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr;
229
+ if (local)
230
+ local(stream, name);
231
+ }
232
+
233
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init)(nvtx_CUevent event, const char* name){
234
+ nvtxNameCuEventA_fakeimpl_fntype local;
235
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
236
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr;
237
+ if (local)
238
+ local(event, name);
239
+ }
240
+
241
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init)(nvtx_CUevent event, const wchar_t* name){
242
+ nvtxNameCuEventW_fakeimpl_fntype local;
243
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
244
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr;
245
+ if (local)
246
+ local(event, name);
247
+ }
248
+
249
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init)(int device, const char* name){
250
+ nvtxNameCudaDeviceA_fakeimpl_fntype local;
251
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
252
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr;
253
+ if (local)
254
+ local(device, name);
255
+ }
256
+
257
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init)(int device, const wchar_t* name){
258
+ nvtxNameCudaDeviceW_fakeimpl_fntype local;
259
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
260
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr;
261
+ if (local)
262
+ local(device, name);
263
+ }
264
+
265
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init)(nvtx_cudaStream_t stream, const char* name){
266
+ nvtxNameCudaStreamA_fakeimpl_fntype local;
267
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
268
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr;
269
+ if (local)
270
+ local(stream, name);
271
+ }
272
+
273
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init)(nvtx_cudaStream_t stream, const wchar_t* name){
274
+ nvtxNameCudaStreamW_fakeimpl_fntype local;
275
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
276
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr;
277
+ if (local)
278
+ local(stream, name);
279
+ }
280
+
281
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init)(nvtx_cudaEvent_t event, const char* name){
282
+ nvtxNameCudaEventA_fakeimpl_fntype local;
283
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
284
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr;
285
+ if (local)
286
+ local(event, name);
287
+ }
288
+
289
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init)(nvtx_cudaEvent_t event, const wchar_t* name){
290
+ nvtxNameCudaEventW_fakeimpl_fntype local;
291
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
292
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr;
293
+ if (local)
294
+ local(event, name);
295
+ }
296
+
297
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init)(nvtx_cl_device_id device, const char* name){
298
+ nvtxNameClDeviceA_fakeimpl_fntype local;
299
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
300
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr;
301
+ if (local)
302
+ local(device, name);
303
+ }
304
+
305
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init)(nvtx_cl_device_id device, const wchar_t* name){
306
+ nvtxNameClDeviceW_fakeimpl_fntype local;
307
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
308
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr;
309
+ if (local)
310
+ local(device, name);
311
+ }
312
+
313
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init)(nvtx_cl_context context, const char* name){
314
+ nvtxNameClContextA_fakeimpl_fntype local;
315
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
316
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr;
317
+ if (local)
318
+ local(context, name);
319
+ }
320
+
321
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init)(nvtx_cl_context context, const wchar_t* name){
322
+ nvtxNameClContextW_fakeimpl_fntype local;
323
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
324
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr;
325
+ if (local)
326
+ local(context, name);
327
+ }
328
+
329
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init)(nvtx_cl_command_queue command_queue, const char* name){
330
+ nvtxNameClCommandQueueA_fakeimpl_fntype local;
331
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
332
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr;
333
+ if (local)
334
+ local(command_queue, name);
335
+ }
336
+
337
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init)(nvtx_cl_command_queue command_queue, const wchar_t* name){
338
+ nvtxNameClCommandQueueW_fakeimpl_fntype local;
339
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
340
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr;
341
+ if (local)
342
+ local(command_queue, name);
343
+ }
344
+
345
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init)(nvtx_cl_mem memobj, const char* name){
346
+ nvtxNameClMemObjectA_fakeimpl_fntype local;
347
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
348
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr;
349
+ if (local)
350
+ local(memobj, name);
351
+ }
352
+
353
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init)(nvtx_cl_mem memobj, const wchar_t* name){
354
+ nvtxNameClMemObjectW_fakeimpl_fntype local;
355
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
356
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr;
357
+ if (local)
358
+ local(memobj, name);
359
+ }
360
+
361
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init)(nvtx_cl_sampler sampler, const char* name){
362
+ nvtxNameClSamplerA_fakeimpl_fntype local;
363
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
364
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr;
365
+ if (local)
366
+ local(sampler, name);
367
+ }
368
+
369
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init)(nvtx_cl_sampler sampler, const wchar_t* name){
370
+ nvtxNameClSamplerW_fakeimpl_fntype local;
371
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
372
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr;
373
+ if (local)
374
+ local(sampler, name);
375
+ }
376
+
377
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init)(nvtx_cl_program program, const char* name){
378
+ nvtxNameClProgramA_fakeimpl_fntype local;
379
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
380
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr;
381
+ if (local)
382
+ local(program, name);
383
+ }
384
+
385
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init)(nvtx_cl_program program, const wchar_t* name){
386
+ nvtxNameClProgramW_fakeimpl_fntype local;
387
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
388
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr;
389
+ if (local)
390
+ local(program, name);
391
+ }
392
+
393
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init)(nvtx_cl_event evnt, const char* name){
394
+ nvtxNameClEventA_fakeimpl_fntype local;
395
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
396
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr;
397
+ if (local)
398
+ local(evnt, name);
399
+ }
400
+
401
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init)(nvtx_cl_event evnt, const wchar_t* name){
402
+ nvtxNameClEventW_fakeimpl_fntype local;
403
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
404
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr;
405
+ if (local)
406
+ local(evnt, name);
407
+ }
408
+
409
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtx_nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtx_nvtxSyncUserAttributes_t* attribs){
410
+ nvtxDomainSyncUserCreate_fakeimpl_fntype local;
411
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
412
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr;
413
+ if (local) {
414
+ return local(domain, attribs);
415
+ }
416
+ return NVTX_NULLPTR;
417
+ }
418
+
419
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtx_nvtxSyncUser_t handle){
420
+ nvtxDomainSyncUserDestroy_fakeimpl_fntype local;
421
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
422
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr;
423
+ if (local)
424
+ local(handle);
425
+ }
426
+
427
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtx_nvtxSyncUser_t handle){
428
+ nvtxDomainSyncUserAcquireStart_fakeimpl_fntype local;
429
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
430
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr;
431
+ if (local)
432
+ local(handle);
433
+ }
434
+
435
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtx_nvtxSyncUser_t handle){
436
+ nvtxDomainSyncUserAcquireFailed_fakeimpl_fntype local;
437
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
438
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr;
439
+ if (local)
440
+ local(handle);
441
+ }
442
+
443
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtx_nvtxSyncUser_t handle){
444
+ nvtxDomainSyncUserAcquireSuccess_fakeimpl_fntype local;
445
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
446
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr;
447
+ if (local)
448
+ local(handle);
449
+ }
450
+
451
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtx_nvtxSyncUser_t handle){
452
+ nvtxDomainSyncUserReleasing_fakeimpl_fntype local;
453
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
454
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr;
455
+ if (local)
456
+ local(handle);
457
+ }
458
+
459
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(int forceAllToNoops);
460
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(int forceAllToNoops)
461
+ {
462
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init) || forceAllToNoops)
463
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr = NVTX_NULLPTR;
464
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init) || forceAllToNoops)
465
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr = NVTX_NULLPTR;
466
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init) || forceAllToNoops)
467
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr = NVTX_NULLPTR;
468
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init) || forceAllToNoops)
469
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr = NVTX_NULLPTR;
470
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init) || forceAllToNoops)
471
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr = NVTX_NULLPTR;
472
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init) || forceAllToNoops)
473
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr = NVTX_NULLPTR;
474
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init) || forceAllToNoops)
475
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr = NVTX_NULLPTR;
476
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init) || forceAllToNoops)
477
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr = NVTX_NULLPTR;
478
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init) || forceAllToNoops)
479
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr = NVTX_NULLPTR;
480
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init) || forceAllToNoops)
481
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr = NVTX_NULLPTR;
482
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init) || forceAllToNoops)
483
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr = NVTX_NULLPTR;
484
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init) || forceAllToNoops)
485
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr = NVTX_NULLPTR;
486
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init) || forceAllToNoops)
487
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr = NVTX_NULLPTR;
488
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init) || forceAllToNoops)
489
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr = NVTX_NULLPTR;
490
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init) || forceAllToNoops)
491
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr = NVTX_NULLPTR;
492
+
493
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init) || forceAllToNoops)
494
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr = NVTX_NULLPTR;
495
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init) || forceAllToNoops)
496
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr = NVTX_NULLPTR;
497
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init) || forceAllToNoops)
498
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr = NVTX_NULLPTR;
499
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init) || forceAllToNoops)
500
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr = NVTX_NULLPTR;
501
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init) || forceAllToNoops)
502
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr = NVTX_NULLPTR;
503
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init) || forceAllToNoops)
504
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr = NVTX_NULLPTR;
505
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init) || forceAllToNoops)
506
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr = NVTX_NULLPTR;
507
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init) || forceAllToNoops)
508
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr = NVTX_NULLPTR;
509
+
510
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init) || forceAllToNoops)
511
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr = NVTX_NULLPTR;
512
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init) || forceAllToNoops)
513
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr = NVTX_NULLPTR;
514
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init) || forceAllToNoops)
515
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr = NVTX_NULLPTR;
516
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init) || forceAllToNoops)
517
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr = NVTX_NULLPTR;
518
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init) || forceAllToNoops)
519
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr = NVTX_NULLPTR;
520
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init) || forceAllToNoops)
521
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr = NVTX_NULLPTR;
522
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init) || forceAllToNoops)
523
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr = NVTX_NULLPTR;
524
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init) || forceAllToNoops)
525
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr = NVTX_NULLPTR;
526
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init) || forceAllToNoops)
527
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr = NVTX_NULLPTR;
528
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init) || forceAllToNoops)
529
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr = NVTX_NULLPTR;
530
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init) || forceAllToNoops)
531
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr = NVTX_NULLPTR;
532
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init) || forceAllToNoops)
533
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr = NVTX_NULLPTR;
534
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init) || forceAllToNoops)
535
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr = NVTX_NULLPTR;
536
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init) || forceAllToNoops)
537
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr = NVTX_NULLPTR;
538
+
539
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init) || forceAllToNoops)
540
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr = NVTX_NULLPTR;
541
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init) || forceAllToNoops)
542
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr = NVTX_NULLPTR;
543
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init) || forceAllToNoops)
544
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr = NVTX_NULLPTR;
545
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init) || forceAllToNoops)
546
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr = NVTX_NULLPTR;
547
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init) || forceAllToNoops)
548
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr = NVTX_NULLPTR;
549
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init) || forceAllToNoops)
550
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr = NVTX_NULLPTR;
551
+
552
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init) || forceAllToNoops)
553
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr = NVTX_NULLPTR;
554
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init) || forceAllToNoops)
555
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr = NVTX_NULLPTR;
556
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init) || forceAllToNoops)
557
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr = NVTX_NULLPTR;
558
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init) || forceAllToNoops)
559
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr = NVTX_NULLPTR;
560
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init) || forceAllToNoops)
561
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr = NVTX_NULLPTR;
562
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init) || forceAllToNoops)
563
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr = NVTX_NULLPTR;
564
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init) || forceAllToNoops)
565
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr = NVTX_NULLPTR;
566
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init) || forceAllToNoops)
567
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr = NVTX_NULLPTR;
568
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init) || forceAllToNoops)
569
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr = NVTX_NULLPTR;
570
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init) || forceAllToNoops)
571
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr = NVTX_NULLPTR;
572
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init) || forceAllToNoops)
573
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr = NVTX_NULLPTR;
574
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init) || forceAllToNoops)
575
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr = NVTX_NULLPTR;
576
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init) || forceAllToNoops)
577
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr = NVTX_NULLPTR;
578
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init) || forceAllToNoops)
579
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr = NVTX_NULLPTR;
580
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init) || forceAllToNoops)
581
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr = NVTX_NULLPTR;
582
+
583
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init) || forceAllToNoops)
584
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr = NVTX_NULLPTR;
585
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init) || forceAllToNoops)
586
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr = NVTX_NULLPTR;
587
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init) || forceAllToNoops)
588
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr = NVTX_NULLPTR;
589
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init) || forceAllToNoops)
590
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr = NVTX_NULLPTR;
591
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init) || forceAllToNoops)
592
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr = NVTX_NULLPTR;
593
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init) || forceAllToNoops)
594
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr = NVTX_NULLPTR;
595
+ }
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxLinkOnce.h ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ #if defined(NVTX_AS_SYSTEM_HEADER)
22
+ #if defined(__clang__)
23
+ #pragma clang system_header
24
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
25
+ #pragma GCC system_header
26
+ #elif defined(_MSC_VER)
27
+ #pragma system_header
28
+ #endif
29
+ #endif
30
+
31
+ #ifndef __NVTX_LINKONCE_H__
32
+ #define __NVTX_LINKONCE_H__
33
+
34
+ /* This header defines macros to permit making definitions of global variables
35
+ * and functions in C/C++ header files which may be included multiple times in
36
+ * a translation unit or linkage unit. It allows authoring header-only libraries
37
+ * which can be used by multiple other header-only libraries (either as the same
38
+ * copy or multiple copies), and does not require any build changes, such as
39
+ * adding another .c file, linking a static library, or deploying a dynamic
40
+ * library. Globals defined with these macros have the property that they have
41
+ * the same address, pointing to a single instance, for the entire linkage unit.
42
+ * It is expected but not guaranteed that each linkage unit will have a separate
43
+ * instance.
44
+ *
45
+ * In some situations it is desirable to declare a variable without initializing
46
+ * it, refer to it in code or other variables' initializers, and then initialize
47
+ * it later. Similarly, functions can be prototyped, have their address taken,
48
+ * and then have their body defined later. In such cases, use the FWDDECL macros
49
+ * when forward-declaring LINKONCE global variables without initializers and
50
+ * function prototypes, and then use the DEFINE macros when later defining them.
51
+ * Although in many cases the FWDDECL macro is equivalent to the DEFINE macro,
52
+ * following this pattern makes code maximally portable.
53
+ */
54
+
55
+ #if defined(_MSC_VER) /* MSVC */
56
+ #if defined(__cplusplus)
57
+ #define NVTX_LINKONCE_DEFINE_GLOBAL extern "C" __declspec(selectany)
58
+ #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline
59
+ #else
60
+ #define NVTX_LINKONCE_DEFINE_GLOBAL __declspec(selectany)
61
+ #define NVTX_LINKONCE_DEFINE_FUNCTION __inline
62
+ #endif
63
+ #define NVTX_LINKONCE_FWDDECL_GLOBAL NVTX_LINKONCE_DEFINE_GLOBAL extern
64
+ #elif defined(_WIN32) || defined(__CYGWIN__) /* MinGW */
65
+ #if defined(__cplusplus)
66
+ #define NVTX_LINKONCE_DEFINE_GLOBAL __declspec(selectany)
67
+ #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline
68
+ #else
69
+ #define NVTX_LINKONCE_DEFINE_GLOBAL __declspec(selectany)
70
+ #define NVTX_LINKONCE_DEFINE_FUNCTION
71
+ #endif
72
+ #define NVTX_LINKONCE_FWDDECL_GLOBAL extern
73
+ #else /* All others: Assume GCC, clang, or compatible */
74
+ #define NVTX_LINKONCE_WEAK __attribute__((weak))
75
+ #define NVTX_LINKONCE_HIDDEN __attribute__((visibility("hidden")))
76
+ #if defined(__cplusplus)
77
+ #define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK
78
+ #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" NVTX_LINKONCE_HIDDEN inline
79
+ #else
80
+ #define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK
81
+ #define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK
82
+ #endif
83
+ #define NVTX_LINKONCE_FWDDECL_GLOBAL NVTX_LINKONCE_DEFINE_GLOBAL extern
84
+ #endif
85
+
86
+ #define NVTX_LINKONCE_FWDDECL_FUNCTION NVTX_LINKONCE_DEFINE_FUNCTION
87
+
88
+ #endif /* __NVTX_LINKONCE_H__ */
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxTypes.h ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ /* This header defines types which are used by the internal implementation
22
+ * of NVTX and callback subscribers. API clients do not use these types,
23
+ * so they are defined here instead of in nvToolsExt.h to clarify they are
24
+ * not part of the NVTX client API. */
25
+
26
+ #ifndef NVTX_IMPL_GUARD
27
+ #error Never include this file directly -- it is automatically included by nvToolsExt.h.
28
+ #endif
29
+
30
+ #if defined(NVTX_AS_SYSTEM_HEADER)
31
+ #if defined(__clang__)
32
+ #pragma clang system_header
33
+ #elif defined(__GNUC__) || defined(__NVCOMPILER)
34
+ #pragma GCC system_header
35
+ #elif defined(_MSC_VER)
36
+ #pragma system_header
37
+ #endif
38
+ #endif
39
+
40
+ /* ------ Dependency-free types binary-compatible with real types ------- */
41
+
42
+ /* In order to avoid having the NVTX core API headers depend on non-NVTX
43
+ * headers like cuda.h, NVTX defines binary-compatible types to use for
44
+ * safely making the initialization versions of all NVTX functions without
45
+ * needing to have definitions for the real types. */
46
+
47
+ typedef int nvtx_CUdevice;
48
+ typedef void* nvtx_CUcontext;
49
+ typedef void* nvtx_CUstream;
50
+ typedef void* nvtx_CUevent;
51
+
52
+ typedef void* nvtx_cudaStream_t;
53
+ typedef void* nvtx_cudaEvent_t;
54
+
55
+ typedef void* nvtx_cl_platform_id;
56
+ typedef void* nvtx_cl_device_id;
57
+ typedef void* nvtx_cl_context;
58
+ typedef void* nvtx_cl_command_queue;
59
+ typedef void* nvtx_cl_mem;
60
+ typedef void* nvtx_cl_program;
61
+ typedef void* nvtx_cl_kernel;
62
+ typedef void* nvtx_cl_event;
63
+ typedef void* nvtx_cl_sampler;
64
+
65
+ typedef void* nvtx_nvtxSyncUser_t;
66
+ typedef void nvtx_nvtxSyncUserAttributes_t;
67
+
68
+ /* --------- Types for function pointers (with fake API types) ---------- */
69
+
70
+ typedef void (NVTX_API * nvtxMarkEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib);
71
+ typedef void (NVTX_API * nvtxMarkA_impl_fntype)(const char* message);
72
+ typedef void (NVTX_API * nvtxMarkW_impl_fntype)(const wchar_t* message);
73
+ typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib);
74
+ typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartA_impl_fntype)(const char* message);
75
+ typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartW_impl_fntype)(const wchar_t* message);
76
+ typedef void (NVTX_API * nvtxRangeEnd_impl_fntype)(nvtxRangeId_t id);
77
+ typedef int (NVTX_API * nvtxRangePushEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib);
78
+ typedef int (NVTX_API * nvtxRangePushA_impl_fntype)(const char* message);
79
+ typedef int (NVTX_API * nvtxRangePushW_impl_fntype)(const wchar_t* message);
80
+ typedef int (NVTX_API * nvtxRangePop_impl_fntype)(void);
81
+ typedef void (NVTX_API * nvtxNameCategoryA_impl_fntype)(uint32_t category, const char* name);
82
+ typedef void (NVTX_API * nvtxNameCategoryW_impl_fntype)(uint32_t category, const wchar_t* name);
83
+ typedef void (NVTX_API * nvtxNameOsThreadA_impl_fntype)(uint32_t threadId, const char* name);
84
+ typedef void (NVTX_API * nvtxNameOsThreadW_impl_fntype)(uint32_t threadId, const wchar_t* name);
85
+
86
+ /* Real impl types are defined in nvtxImplCuda_v3.h, where CUDA headers are included */
87
+ typedef void (NVTX_API * nvtxNameCuDeviceA_fakeimpl_fntype)(nvtx_CUdevice device, const char* name);
88
+ typedef void (NVTX_API * nvtxNameCuDeviceW_fakeimpl_fntype)(nvtx_CUdevice device, const wchar_t* name);
89
+ typedef void (NVTX_API * nvtxNameCuContextA_fakeimpl_fntype)(nvtx_CUcontext context, const char* name);
90
+ typedef void (NVTX_API * nvtxNameCuContextW_fakeimpl_fntype)(nvtx_CUcontext context, const wchar_t* name);
91
+ typedef void (NVTX_API * nvtxNameCuStreamA_fakeimpl_fntype)(nvtx_CUstream stream, const char* name);
92
+ typedef void (NVTX_API * nvtxNameCuStreamW_fakeimpl_fntype)(nvtx_CUstream stream, const wchar_t* name);
93
+ typedef void (NVTX_API * nvtxNameCuEventA_fakeimpl_fntype)(nvtx_CUevent event, const char* name);
94
+ typedef void (NVTX_API * nvtxNameCuEventW_fakeimpl_fntype)(nvtx_CUevent event, const wchar_t* name);
95
+
96
+ /* Real impl types are defined in nvtxImplOpenCL_v3.h, where OPENCL headers are included */
97
+ typedef void (NVTX_API * nvtxNameClDeviceA_fakeimpl_fntype)(nvtx_cl_device_id device, const char* name);
98
+ typedef void (NVTX_API * nvtxNameClDeviceW_fakeimpl_fntype)(nvtx_cl_device_id device, const wchar_t* name);
99
+ typedef void (NVTX_API * nvtxNameClContextA_fakeimpl_fntype)(nvtx_cl_context context, const char* name);
100
+ typedef void (NVTX_API * nvtxNameClContextW_fakeimpl_fntype)(nvtx_cl_context context, const wchar_t* name);
101
+ typedef void (NVTX_API * nvtxNameClCommandQueueA_fakeimpl_fntype)(nvtx_cl_command_queue command_queue, const char* name);
102
+ typedef void (NVTX_API * nvtxNameClCommandQueueW_fakeimpl_fntype)(nvtx_cl_command_queue command_queue, const wchar_t* name);
103
+ typedef void (NVTX_API * nvtxNameClMemObjectA_fakeimpl_fntype)(nvtx_cl_mem memobj, const char* name);
104
+ typedef void (NVTX_API * nvtxNameClMemObjectW_fakeimpl_fntype)(nvtx_cl_mem memobj, const wchar_t* name);
105
+ typedef void (NVTX_API * nvtxNameClSamplerA_fakeimpl_fntype)(nvtx_cl_sampler sampler, const char* name);
106
+ typedef void (NVTX_API * nvtxNameClSamplerW_fakeimpl_fntype)(nvtx_cl_sampler sampler, const wchar_t* name);
107
+ typedef void (NVTX_API * nvtxNameClProgramA_fakeimpl_fntype)(nvtx_cl_program program, const char* name);
108
+ typedef void (NVTX_API * nvtxNameClProgramW_fakeimpl_fntype)(nvtx_cl_program program, const wchar_t* name);
109
+ typedef void (NVTX_API * nvtxNameClEventA_fakeimpl_fntype)(nvtx_cl_event evnt, const char* name);
110
+ typedef void (NVTX_API * nvtxNameClEventW_fakeimpl_fntype)(nvtx_cl_event evnt, const wchar_t* name);
111
+
112
+ /* Real impl types are defined in nvtxImplCudaRt_v3.h, where CUDART headers are included */
113
+ typedef void (NVTX_API * nvtxNameCudaDeviceA_fakeimpl_fntype)(int device, const char* name);
114
+ typedef void (NVTX_API * nvtxNameCudaDeviceW_fakeimpl_fntype)(int device, const wchar_t* name);
115
+ typedef void (NVTX_API * nvtxNameCudaStreamA_fakeimpl_fntype)(nvtx_cudaStream_t stream, const char* name);
116
+ typedef void (NVTX_API * nvtxNameCudaStreamW_fakeimpl_fntype)(nvtx_cudaStream_t stream, const wchar_t* name);
117
+ typedef void (NVTX_API * nvtxNameCudaEventA_fakeimpl_fntype)(nvtx_cudaEvent_t event, const char* name);
118
+ typedef void (NVTX_API * nvtxNameCudaEventW_fakeimpl_fntype)(nvtx_cudaEvent_t event, const wchar_t* name);
119
+
120
+ typedef void (NVTX_API * nvtxDomainMarkEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
121
+ typedef nvtxRangeId_t (NVTX_API * nvtxDomainRangeStartEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
122
+ typedef void (NVTX_API * nvtxDomainRangeEnd_impl_fntype)(nvtxDomainHandle_t domain, nvtxRangeId_t id);
123
+ typedef int (NVTX_API * nvtxDomainRangePushEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
124
+ typedef int (NVTX_API * nvtxDomainRangePop_impl_fntype)(nvtxDomainHandle_t domain);
125
+ typedef nvtxResourceHandle_t (NVTX_API * nvtxDomainResourceCreate_impl_fntype)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs);
126
+ typedef void (NVTX_API * nvtxDomainResourceDestroy_impl_fntype)(nvtxResourceHandle_t resource);
127
+ typedef void (NVTX_API * nvtxDomainNameCategoryA_impl_fntype)(nvtxDomainHandle_t domain, uint32_t category, const char* name);
128
+ typedef void (NVTX_API * nvtxDomainNameCategoryW_impl_fntype)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name);
129
+ typedef nvtxStringHandle_t (NVTX_API * nvtxDomainRegisterStringA_impl_fntype)(nvtxDomainHandle_t domain, const char* string);
130
+ typedef nvtxStringHandle_t (NVTX_API * nvtxDomainRegisterStringW_impl_fntype)(nvtxDomainHandle_t domain, const wchar_t* string);
131
+ typedef nvtxDomainHandle_t (NVTX_API * nvtxDomainCreateA_impl_fntype)(const char* message);
132
+ typedef nvtxDomainHandle_t (NVTX_API * nvtxDomainCreateW_impl_fntype)(const wchar_t* message);
133
+ typedef void (NVTX_API * nvtxDomainDestroy_impl_fntype)(nvtxDomainHandle_t domain);
134
+ typedef void (NVTX_API * nvtxInitialize_impl_fntype)(const void* reserved);
135
+
136
+ typedef nvtx_nvtxSyncUser_t (NVTX_API * nvtxDomainSyncUserCreate_fakeimpl_fntype)(nvtxDomainHandle_t domain, const nvtx_nvtxSyncUserAttributes_t* attribs);
137
+ typedef void (NVTX_API * nvtxDomainSyncUserDestroy_fakeimpl_fntype)(nvtx_nvtxSyncUser_t handle);
138
+ typedef void (NVTX_API * nvtxDomainSyncUserAcquireStart_fakeimpl_fntype)(nvtx_nvtxSyncUser_t handle);
139
+ typedef void (NVTX_API * nvtxDomainSyncUserAcquireFailed_fakeimpl_fntype)(nvtx_nvtxSyncUser_t handle);
140
+ typedef void (NVTX_API * nvtxDomainSyncUserAcquireSuccess_fakeimpl_fntype)(nvtx_nvtxSyncUser_t handle);
141
+ typedef void (NVTX_API * nvtxDomainSyncUserReleasing_fakeimpl_fntype)(nvtx_nvtxSyncUser_t handle);
142
+
143
+ /* ---------------- Types for callback subscription --------------------- */
144
+
145
+ typedef const void *(NVTX_API * NvtxGetExportTableFunc_t)(uint32_t exportTableId);
146
+ typedef int (NVTX_API * NvtxInitializeInjectionNvtxFunc_t)(NvtxGetExportTableFunc_t exportTable);
147
+
148
+ typedef enum NvtxCallbackModule
149
+ {
150
+ NVTX_CB_MODULE_INVALID = 0,
151
+ NVTX_CB_MODULE_CORE = 1,
152
+ NVTX_CB_MODULE_CUDA = 2,
153
+ NVTX_CB_MODULE_OPENCL = 3,
154
+ NVTX_CB_MODULE_CUDART = 4,
155
+ NVTX_CB_MODULE_CORE2 = 5,
156
+ NVTX_CB_MODULE_SYNC = 6,
157
+ /* --- New constants must only be added directly above this line --- */
158
+ NVTX_CB_MODULE_SIZE,
159
+ NVTX_CB_MODULE_FORCE_INT = 0x7fffffff
160
+ } NvtxCallbackModule;
161
+
162
+ typedef enum NvtxCallbackIdCore
163
+ {
164
+ NVTX_CBID_CORE_INVALID = 0,
165
+ NVTX_CBID_CORE_MarkEx = 1,
166
+ NVTX_CBID_CORE_MarkA = 2,
167
+ NVTX_CBID_CORE_MarkW = 3,
168
+ NVTX_CBID_CORE_RangeStartEx = 4,
169
+ NVTX_CBID_CORE_RangeStartA = 5,
170
+ NVTX_CBID_CORE_RangeStartW = 6,
171
+ NVTX_CBID_CORE_RangeEnd = 7,
172
+ NVTX_CBID_CORE_RangePushEx = 8,
173
+ NVTX_CBID_CORE_RangePushA = 9,
174
+ NVTX_CBID_CORE_RangePushW = 10,
175
+ NVTX_CBID_CORE_RangePop = 11,
176
+ NVTX_CBID_CORE_NameCategoryA = 12,
177
+ NVTX_CBID_CORE_NameCategoryW = 13,
178
+ NVTX_CBID_CORE_NameOsThreadA = 14,
179
+ NVTX_CBID_CORE_NameOsThreadW = 15,
180
+ /* --- New constants must only be added directly above this line --- */
181
+ NVTX_CBID_CORE_SIZE,
182
+ NVTX_CBID_CORE_FORCE_INT = 0x7fffffff
183
+ } NvtxCallbackIdCore;
184
+
185
+ typedef enum NvtxCallbackIdCore2
186
+ {
187
+ NVTX_CBID_CORE2_INVALID = 0,
188
+ NVTX_CBID_CORE2_DomainMarkEx = 1,
189
+ NVTX_CBID_CORE2_DomainRangeStartEx = 2,
190
+ NVTX_CBID_CORE2_DomainRangeEnd = 3,
191
+ NVTX_CBID_CORE2_DomainRangePushEx = 4,
192
+ NVTX_CBID_CORE2_DomainRangePop = 5,
193
+ NVTX_CBID_CORE2_DomainResourceCreate = 6,
194
+ NVTX_CBID_CORE2_DomainResourceDestroy = 7,
195
+ NVTX_CBID_CORE2_DomainNameCategoryA = 8,
196
+ NVTX_CBID_CORE2_DomainNameCategoryW = 9,
197
+ NVTX_CBID_CORE2_DomainRegisterStringA = 10,
198
+ NVTX_CBID_CORE2_DomainRegisterStringW = 11,
199
+ NVTX_CBID_CORE2_DomainCreateA = 12,
200
+ NVTX_CBID_CORE2_DomainCreateW = 13,
201
+ NVTX_CBID_CORE2_DomainDestroy = 14,
202
+ NVTX_CBID_CORE2_Initialize = 15,
203
+ /* --- New constants must only be added directly above this line --- */
204
+ NVTX_CBID_CORE2_SIZE,
205
+ NVTX_CBID_CORE2_FORCE_INT = 0x7fffffff
206
+ } NvtxCallbackIdCore2;
207
+
208
+ typedef enum NvtxCallbackIdCuda
209
+ {
210
+ NVTX_CBID_CUDA_INVALID = 0,
211
+ NVTX_CBID_CUDA_NameCuDeviceA = 1,
212
+ NVTX_CBID_CUDA_NameCuDeviceW = 2,
213
+ NVTX_CBID_CUDA_NameCuContextA = 3,
214
+ NVTX_CBID_CUDA_NameCuContextW = 4,
215
+ NVTX_CBID_CUDA_NameCuStreamA = 5,
216
+ NVTX_CBID_CUDA_NameCuStreamW = 6,
217
+ NVTX_CBID_CUDA_NameCuEventA = 7,
218
+ NVTX_CBID_CUDA_NameCuEventW = 8,
219
+ /* --- New constants must only be added directly above this line --- */
220
+ NVTX_CBID_CUDA_SIZE,
221
+ NVTX_CBID_CUDA_FORCE_INT = 0x7fffffff
222
+ } NvtxCallbackIdCuda;
223
+
224
+ typedef enum NvtxCallbackIdCudaRt
225
+ {
226
+ NVTX_CBID_CUDART_INVALID = 0,
227
+ NVTX_CBID_CUDART_NameCudaDeviceA = 1,
228
+ NVTX_CBID_CUDART_NameCudaDeviceW = 2,
229
+ NVTX_CBID_CUDART_NameCudaStreamA = 3,
230
+ NVTX_CBID_CUDART_NameCudaStreamW = 4,
231
+ NVTX_CBID_CUDART_NameCudaEventA = 5,
232
+ NVTX_CBID_CUDART_NameCudaEventW = 6,
233
+ /* --- New constants must only be added directly above this line --- */
234
+ NVTX_CBID_CUDART_SIZE,
235
+ NVTX_CBID_CUDART_FORCE_INT = 0x7fffffff
236
+ } NvtxCallbackIdCudaRt;
237
+
238
+ typedef enum NvtxCallbackIdOpenCL
239
+ {
240
+ NVTX_CBID_OPENCL_INVALID = 0,
241
+ NVTX_CBID_OPENCL_NameClDeviceA = 1,
242
+ NVTX_CBID_OPENCL_NameClDeviceW = 2,
243
+ NVTX_CBID_OPENCL_NameClContextA = 3,
244
+ NVTX_CBID_OPENCL_NameClContextW = 4,
245
+ NVTX_CBID_OPENCL_NameClCommandQueueA = 5,
246
+ NVTX_CBID_OPENCL_NameClCommandQueueW = 6,
247
+ NVTX_CBID_OPENCL_NameClMemObjectA = 7,
248
+ NVTX_CBID_OPENCL_NameClMemObjectW = 8,
249
+ NVTX_CBID_OPENCL_NameClSamplerA = 9,
250
+ NVTX_CBID_OPENCL_NameClSamplerW = 10,
251
+ NVTX_CBID_OPENCL_NameClProgramA = 11,
252
+ NVTX_CBID_OPENCL_NameClProgramW = 12,
253
+ NVTX_CBID_OPENCL_NameClEventA = 13,
254
+ NVTX_CBID_OPENCL_NameClEventW = 14,
255
+ /* --- New constants must only be added directly above this line --- */
256
+ NVTX_CBID_OPENCL_SIZE,
257
+ NVTX_CBID_OPENCL_FORCE_INT = 0x7fffffff
258
+ } NvtxCallbackIdOpenCL;
259
+
260
+ typedef enum NvtxCallbackIdSync
261
+ {
262
+ NVTX_CBID_SYNC_INVALID = 0,
263
+ NVTX_CBID_SYNC_DomainSyncUserCreate = 1,
264
+ NVTX_CBID_SYNC_DomainSyncUserDestroy = 2,
265
+ NVTX_CBID_SYNC_DomainSyncUserAcquireStart = 3,
266
+ NVTX_CBID_SYNC_DomainSyncUserAcquireFailed = 4,
267
+ NVTX_CBID_SYNC_DomainSyncUserAcquireSuccess = 5,
268
+ NVTX_CBID_SYNC_DomainSyncUserReleasing = 6,
269
+ /* --- New constants must only be added directly above this line --- */
270
+ NVTX_CBID_SYNC_SIZE,
271
+ NVTX_CBID_SYNC_FORCE_INT = 0x7fffffff
272
+ } NvtxCallbackIdSync;
273
+
274
+ /* IDs for NVTX Export Tables */
275
+ typedef enum NvtxExportTableID
276
+ {
277
+ NVTX_ETID_INVALID = 0,
278
+ NVTX_ETID_CALLBACKS = 1,
279
+ NVTX_ETID_RESERVED0 = 2,
280
+ NVTX_ETID_VERSIONINFO = 3,
281
+ /* --- New constants must only be added directly above this line --- */
282
+ NVTX_ETID_SIZE,
283
+ NVTX_ETID_FORCE_INT = 0x7fffffff
284
+ } NvtxExportTableID;
285
+
286
+ typedef void (* NvtxFunctionPointer)(void); /* generic uncallable function pointer, must be cast to appropriate function type */
287
+ typedef NvtxFunctionPointer** NvtxFunctionTable; /* double pointer because array(1) of pointers(2) to function pointers */
288
+
289
+ typedef struct NvtxExportTableCallbacks
290
+ {
291
+ size_t struct_size;
292
+
293
+ /* returns an array of pointer to function pointers*/
294
+ int (NVTX_API *GetModuleFunctionTable)(
295
+ NvtxCallbackModule callback_module,
296
+ NvtxFunctionTable* out_table,
297
+ unsigned int* out_size);
298
+ } NvtxExportTableCallbacks;
299
+
300
+ typedef struct NvtxExportTableVersionInfo
301
+ {
302
+ /* sizeof(NvtxExportTableVersionInfo) */
303
+ size_t struct_size;
304
+
305
+ /* The API version comes from the NVTX library linked to the app. The
306
+ * injection library is can use this info to make some assumptions */
307
+ uint32_t version;
308
+
309
+ /* Reserved for alignment, do not use */
310
+ uint32_t reserved0;
311
+
312
+ /* This must be set by tools when attaching to provide applications
313
+ * the ability to, in emergency situations, detect problematic tools
314
+ * versions and modify the NVTX source to prevent attaching anything
315
+ * that causes trouble in the app. Currently, this value is ignored. */
316
+ void (NVTX_API *SetInjectionNvtxVersion)(
317
+ uint32_t version);
318
+ } NvtxExportTableVersionInfo;
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/lib/libcufile_rdma.so.1 ADDED
Binary file (43.3 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/lib/libnvtx3interop.so.1 ADDED
Binary file (40.2 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (205 Bytes). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/include/__init__.py ADDED
File without changes
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/include/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (213 Bytes). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/include/cufile.h ADDED
@@ -0,0 +1,740 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 1993-2023 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ /**
51
+ * @file cufile.h
52
+ * @brief cuFile C APIs
53
+ *
54
+ * This file contains all the C APIs to perform GPUDirect Storage supported IO operations
55
+ */
56
+
57
+ #ifdef __cplusplus
58
+ extern "C"
59
+ {
60
+ #endif
61
+
62
+ /// @cond DOXYGEN_SKIP_MACRO
63
+ #ifndef __CUFILE_H_
64
+ #define __CUFILE_H_
65
+
66
+ #include <stdlib.h>
67
+ #include <stdbool.h>
68
+
69
+ #include <cuda.h>
70
+ #include <arpa/inet.h>
71
+ #include <sys/socket.h>
72
+
73
+ #define CUFILEOP_BASE_ERR 5000
74
+
75
+ //Note :Data path errors are captured via standard error codes
76
+ #define CUFILEOP_STATUS_ENTRIES \
77
+ CUFILE_OP(0, CU_FILE_SUCCESS, cufile success) \
78
+ CUFILE_OP(CUFILEOP_BASE_ERR + 1, CU_FILE_DRIVER_NOT_INITIALIZED, nvidia-fs driver is not loaded. Set allow_compat_mode to true in cufile.json file to enable compatible mode) \
79
+ CUFILE_OP(CUFILEOP_BASE_ERR + 2, CU_FILE_DRIVER_INVALID_PROPS, invalid property) \
80
+ CUFILE_OP(CUFILEOP_BASE_ERR + 3, CU_FILE_DRIVER_UNSUPPORTED_LIMIT, property range error) \
81
+ CUFILE_OP(CUFILEOP_BASE_ERR + 4, CU_FILE_DRIVER_VERSION_MISMATCH, nvidia-fs driver version mismatch) \
82
+ CUFILE_OP(CUFILEOP_BASE_ERR + 5, CU_FILE_DRIVER_VERSION_READ_ERROR, nvidia-fs driver version read error) \
83
+ CUFILE_OP(CUFILEOP_BASE_ERR + 6, CU_FILE_DRIVER_CLOSING, driver shutdown in progress) \
84
+ CUFILE_OP(CUFILEOP_BASE_ERR + 7, CU_FILE_PLATFORM_NOT_SUPPORTED, GPUDirect Storage not supported on current platform) \
85
+ CUFILE_OP(CUFILEOP_BASE_ERR + 8, CU_FILE_IO_NOT_SUPPORTED, GPUDirect Storage not supported on current file) \
86
+ CUFILE_OP(CUFILEOP_BASE_ERR + 9, CU_FILE_DEVICE_NOT_SUPPORTED, GPUDirect Storage not supported on current GPU) \
87
+ CUFILE_OP(CUFILEOP_BASE_ERR + 10, CU_FILE_NVFS_DRIVER_ERROR, nvidia-fs driver ioctl error) \
88
+ CUFILE_OP(CUFILEOP_BASE_ERR + 11, CU_FILE_CUDA_DRIVER_ERROR, CUDA Driver API error) \
89
+ CUFILE_OP(CUFILEOP_BASE_ERR + 12, CU_FILE_CUDA_POINTER_INVALID, invalid device pointer) \
90
+ CUFILE_OP(CUFILEOP_BASE_ERR + 13, CU_FILE_CUDA_MEMORY_TYPE_INVALID, invalid pointer memory type) \
91
+ CUFILE_OP(CUFILEOP_BASE_ERR + 14, CU_FILE_CUDA_POINTER_RANGE_ERROR, pointer range exceeds allocated address range) \
92
+ CUFILE_OP(CUFILEOP_BASE_ERR + 15, CU_FILE_CUDA_CONTEXT_MISMATCH, cuda context mismatch) \
93
+ CUFILE_OP(CUFILEOP_BASE_ERR + 16, CU_FILE_INVALID_MAPPING_SIZE, access beyond maximum pinned size) \
94
+ CUFILE_OP(CUFILEOP_BASE_ERR + 17, CU_FILE_INVALID_MAPPING_RANGE, access beyond mapped size) \
95
+ CUFILE_OP(CUFILEOP_BASE_ERR + 18, CU_FILE_INVALID_FILE_TYPE, unsupported file type) \
96
+ CUFILE_OP(CUFILEOP_BASE_ERR + 19, CU_FILE_INVALID_FILE_OPEN_FLAG, unsupported file open flags) \
97
+ CUFILE_OP(CUFILEOP_BASE_ERR + 20, CU_FILE_DIO_NOT_SET, fd direct IO not set) \
98
+ CUFILE_OP(CUFILEOP_BASE_ERR + 22, CU_FILE_INVALID_VALUE, invalid arguments) \
99
+ CUFILE_OP(CUFILEOP_BASE_ERR + 23, CU_FILE_MEMORY_ALREADY_REGISTERED, device pointer already registered) \
100
+ CUFILE_OP(CUFILEOP_BASE_ERR + 24, CU_FILE_MEMORY_NOT_REGISTERED, device pointer lookup failure) \
101
+ CUFILE_OP(CUFILEOP_BASE_ERR + 25, CU_FILE_PERMISSION_DENIED, driver or file access error) \
102
+ CUFILE_OP(CUFILEOP_BASE_ERR + 26, CU_FILE_DRIVER_ALREADY_OPEN, driver is already open) \
103
+ CUFILE_OP(CUFILEOP_BASE_ERR + 27, CU_FILE_HANDLE_NOT_REGISTERED, file descriptor is not registered) \
104
+ CUFILE_OP(CUFILEOP_BASE_ERR + 28, CU_FILE_HANDLE_ALREADY_REGISTERED, file descriptor is already registered) \
105
+ CUFILE_OP(CUFILEOP_BASE_ERR + 29, CU_FILE_DEVICE_NOT_FOUND, GPU device not found) \
106
+ CUFILE_OP(CUFILEOP_BASE_ERR + 30, CU_FILE_INTERNAL_ERROR, internal error) \
107
+ CUFILE_OP(CUFILEOP_BASE_ERR + 31, CU_FILE_GETNEWFD_FAILED, failed to obtain new file descriptor) \
108
+ CUFILE_OP(CUFILEOP_BASE_ERR + 33, CU_FILE_NVFS_SETUP_ERROR, NVFS driver initialization error) \
109
+ CUFILE_OP(CUFILEOP_BASE_ERR + 34, CU_FILE_IO_DISABLED, GPUDirect Storage disabled by config on current file)\
110
+ CUFILE_OP(CUFILEOP_BASE_ERR + 35, CU_FILE_BATCH_SUBMIT_FAILED, failed to submit batch operation)\
111
+ CUFILE_OP(CUFILEOP_BASE_ERR + 36, CU_FILE_GPU_MEMORY_PINNING_FAILED, failed to allocate pinned GPU Memory) \
112
+ CUFILE_OP(CUFILEOP_BASE_ERR + 37, CU_FILE_BATCH_FULL, queue full for batch operation) \
113
+ CUFILE_OP(CUFILEOP_BASE_ERR + 38, CU_FILE_ASYNC_NOT_SUPPORTED, cuFile stream operation not supported) \
114
+ CUFILE_OP(CUFILEOP_BASE_ERR + 39, CU_FILE_IO_MAX_ERROR, GPUDirect Storage Max Error)
115
+
116
+
117
+ /**
118
+ * @brief cufileop status enum
119
+ *
120
+ * @note on success the error code is set to @ref CU_FILE_SUCCESS.
121
+ * @note The error code can be inspected using @ref IS_CUFILE_ERR and @ref CUFILE_ERRSTR.
122
+ * @note The error code if set to @ref CU_FILE_CUDA_DRIVER_ERROR, then cuda error can be inspected using @ref IS_CUDA_ERR and @ref CU_FILE_CUDA_ERR.
123
+ * @note Data path errors are captured via standard error codes
124
+ */
125
+ typedef enum CUfileOpError {
126
+ /// @cond DOXYGEN_SKIP_MACRO
127
+ #define CUFILE_OP(code, name, string) name = code,
128
+ CUFILEOP_STATUS_ENTRIES
129
+ #undef CUFILE_OP
130
+ ///@endcond
131
+ } CUfileOpError;
132
+
133
+ /// @endcond
134
+
135
+ /**
136
+ * @brief cufileop status string
137
+ */
138
+ static inline const char *cufileop_status_error(CUfileOpError status)
139
+ {
140
+ switch (status) {
141
+ /// @cond DOXYGEN_SKIP_MACRO
142
+ #define CUFILE_OP(code, name, string) \
143
+ case name: return #string;
144
+ CUFILEOP_STATUS_ENTRIES
145
+ #undef CUFILE_OP
146
+ ///@endcond
147
+ default:return "unknown cufile error";
148
+ }
149
+ }
150
+
151
+ /**
152
+ * @brief cufileop status string
153
+ */
154
+ typedef struct CUfileError {
155
+
156
+ CUfileOpError err; // cufile error
157
+
158
+ CUresult cu_err; // cuda driver error
159
+
160
+ }CUfileError_t;
161
+
162
+ /**
163
+ * @brief error macros to inspect error status of type @ref CUfileOpError
164
+ */
165
+
166
+ #define IS_CUFILE_ERR(err) \
167
+ (abs((err)) > CUFILEOP_BASE_ERR)
168
+
169
+ #define CUFILE_ERRSTR(err) \
170
+ cufileop_status_error((CUfileOpError)abs((err)))
171
+
172
+ #define IS_CUDA_ERR(status) \
173
+ ((status).err == CU_FILE_CUDA_DRIVER_ERROR)
174
+
175
+ #define CU_FILE_CUDA_ERR(status) ((status).cu_err)
176
+
177
+ /* driver properties */
178
+ typedef enum CUfileDriverStatusFlags {
179
+ CU_FILE_LUSTRE_SUPPORTED = 0, /*!< Support for DDN LUSTRE */
180
+
181
+ CU_FILE_WEKAFS_SUPPORTED = 1, /*!< Support for WEKAFS */
182
+
183
+ CU_FILE_NFS_SUPPORTED = 2, /*!< Support for NFS */
184
+
185
+ CU_FILE_GPFS_SUPPORTED = 3, /*! < Support for GPFS */
186
+
187
+ CU_FILE_NVME_SUPPORTED = 4, /*!< Support for NVMe */
188
+
189
+ CU_FILE_NVMEOF_SUPPORTED = 5, /*!< Support for NVMeOF */
190
+
191
+ CU_FILE_SCSI_SUPPORTED = 6, /*!< Support for SCSI */
192
+
193
+ CU_FILE_SCALEFLUX_CSD_SUPPORTED = 7, /*!< Support for Scaleflux CSD*/
194
+
195
+ CU_FILE_NVMESH_SUPPORTED = 8, /*!< Support for NVMesh Block Dev*/
196
+ CU_FILE_BEEGFS_SUPPORTED = 9, /*!< Support for BeeGFS */
197
+ //10 is reserved for YRCloudFile
198
+ CU_FILE_NVME_P2P_SUPPORTED = 11, /*!< Support for NVMe using PCI P2PDMA */
199
+
200
+ }CUfileDriverStatusFlags_t;
201
+
202
+ typedef enum CUfileDriverControlFlags {
203
+ CU_FILE_USE_POLL_MODE = 0 , /*!< use POLL mode. properties.use_poll_mode*/
204
+
205
+ CU_FILE_ALLOW_COMPAT_MODE = 1/*!< allow COMPATIBILITY mode. properties.allow_compat_mode*/
206
+
207
+ }CUfileDriverControlFlags_t;
208
+
209
+ typedef enum CUfileFeatureFlags {
210
+ CU_FILE_DYN_ROUTING_SUPPORTED = 0, /*!< Support for Dynamic routing to handle devices across the PCIe bridges */
211
+
212
+ CU_FILE_BATCH_IO_SUPPORTED = 1, /*!< Unsupported */
213
+
214
+ CU_FILE_STREAMS_SUPPORTED = 2, /*!< Unsupported */
215
+
216
+ CU_FILE_PARALLEL_IO_SUPPORTED = 3 /*!< Unsupported */
217
+ }CUfileFeatureFlags_t;
218
+
219
+ typedef struct CUfileDrvProps {
220
+ struct {
221
+ unsigned int major_version;
222
+
223
+ unsigned int minor_version;
224
+
225
+ size_t poll_thresh_size;
226
+
227
+ size_t max_direct_io_size;
228
+
229
+ unsigned int dstatusflags;
230
+
231
+ unsigned int dcontrolflags;
232
+
233
+ } nvfs;
234
+
235
+ unsigned int fflags;
236
+
237
+ unsigned int max_device_cache_size;
238
+
239
+ unsigned int per_buffer_cache_size;
240
+
241
+ unsigned int max_device_pinned_mem_size;
242
+
243
+ unsigned int max_batch_io_size;
244
+ unsigned int max_batch_io_timeout_msecs;
245
+ }CUfileDrvProps_t;
246
+
247
+ typedef struct sockaddr sockaddr_t;
248
+
249
+ typedef struct cufileRDMAInfo
250
+ {
251
+ int version;
252
+ int desc_len;
253
+ const char *desc_str;
254
+ }cufileRDMAInfo_t;
255
+
256
+ #define CU_FILE_RDMA_REGISTER 1
257
+ #define CU_FILE_RDMA_RELAXED_ORDERING (1<<1)
258
+
259
+
260
+
261
+ typedef struct CUfileFSOps {
262
+ /* NULL means discover using fstat */
263
+ const char* (*fs_type) (void *handle);
264
+
265
+ /* list of host addresses to use, NULL means no restriction */
266
+ int (*getRDMADeviceList)(void *handle, sockaddr_t **hostaddrs);
267
+
268
+ /* -1 no pref */
269
+ int (*getRDMADevicePriority)(void *handle, char*, size_t,
270
+ loff_t, sockaddr_t* hostaddr);
271
+
272
+ /* NULL means try VFS */
273
+ ssize_t (*read) (void *handle, char*, size_t, loff_t, cufileRDMAInfo_t*);
274
+ ssize_t (*write) (void *handle, const char *, size_t, loff_t , cufileRDMAInfo_t*);
275
+ }CUfileFSOps_t;
276
+
277
+ /* File Handle */
278
+ enum CUfileFileHandleType {
279
+ CU_FILE_HANDLE_TYPE_OPAQUE_FD = 1, /*!< Linux based fd */
280
+
281
+ CU_FILE_HANDLE_TYPE_OPAQUE_WIN32 = 2, /*!< Windows based handle (unsupported) */
282
+
283
+ CU_FILE_HANDLE_TYPE_USERSPACE_FS = 3, /* Userspace based FS */
284
+ };
285
+
286
+ typedef struct CUfileDescr_t {
287
+ enum CUfileFileHandleType type; /* type of file being registered */
288
+ union {
289
+ int fd; /* Linux */
290
+ void *handle; /* Windows */
291
+ } handle;
292
+ const CUfileFSOps_t *fs_ops; /* file system operation table */
293
+ }CUfileDescr_t;
294
+
295
+ /**
296
+ * @brief File handle type
297
+ *
298
+ */
299
+ typedef void* CUfileHandle_t;
300
+
301
+
302
+ #pragma GCC visibility push(default)
303
+
304
+ /**
305
+ * @brief cuFileHandleRegister is required, and performs extra checking that is memoized to provide increased performance on later cuFile operations.
306
+ *
307
+ * @param fh @ref CUfileHandle_t opaque file handle for IO operations
308
+ * @param descr @ref CUfileDescr_t file descriptor (OS agnostic)
309
+ *
310
+ * @return CU_FILE_SUCCESS on successful completion. fh will be updated for use in @ref cuFileRead, @ref cuFileWrite, @ref cuFileHandleDeregister
311
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED on failure to load driver
312
+ * @return CU_FILE_IO_NOT_SUPPORTED - if filesystem is not supported
313
+ * @return CU_FILE_INVALID_VALUE if null or bad api arguments
314
+ * @return CU_FILE_INVALID_FILE_OPEN_FLAG if file is opened with unsupported modes like no O_DIRECT
315
+ * @return CU_FILE_INVALID_FILE_TYPE if filepath is not valid or is not a regular file
316
+ * @return CU_FILE_HANDLE_ALREADY_REGISTERED if file handle/descriptor is already registered
317
+ *
318
+ * <b>Description</b>
319
+ * cuFileHandleRegister registers the open file descriptor for use with cuFile IO operations.
320
+ *
321
+ * This API will ensure that the file’s descriptor is checked for GPUDirect Storage support and returns a valid file handle on CU_FILE_SUCCESS.
322
+ *
323
+ * @note the file needs to be opened in O_DIRECT mode to support GPUDirect Storage.
324
+ *
325
+ * @see cuFileRead
326
+ * @see cuFileWrite
327
+ * @see cuFileHandleDeregister
328
+ *
329
+ */
330
+ CUfileError_t cuFileHandleRegister(CUfileHandle_t *fh, CUfileDescr_t *descr);
331
+
332
+ /**
333
+ * @brief releases a registered filehandle from cuFile
334
+ *
335
+ * @param fh @ref CUfileHandle_t file handle
336
+ *
337
+ * @return void
338
+ *
339
+ * @see cuFileHandleRegister
340
+ */
341
+ void cuFileHandleDeregister(CUfileHandle_t fh);
342
+
343
+ /**
344
+ * @brief register an existing cudaMalloced memory with cuFile to pin for GPUDirect Storage access or
345
+ * register host allocated memory with cuFile.
346
+ *
347
+ * @param bufPtr_base buffer pointer allocated
348
+ * @param length size of memory region from the above specified bufPtr
349
+ * @param flags CU_FILE_RDMA_REGISTER
350
+ *
351
+ * @return CU_FILE_SUCCESS on success
352
+ * @return CU_FILE_NVFS_DRIVER_ERROR
353
+ * @return CU_FILE_INVALID_VALUE
354
+ * @return CU_FILE_CUDA_ERROR for unsuported memory type
355
+ * @return CU_FILE_MEMORY_ALREADY_REGISTERED on error
356
+ * @return CU_FILE_GPU_MEMORY_PINNING_FAILED if not enough pinned memory is available
357
+ * @note This memory will be use to perform GPU direct DMA from the supported storage.
358
+ * @warning This API is intended for usecases where the memory is used as streaming buffer that is reused across multiple cuFile IO operations before calling @ref cuFileBufDeregister
359
+ *
360
+ * @see cuFileBufDeregister
361
+ * @see cuFileRead
362
+ * @see cuFileWrite
363
+ */
364
+ CUfileError_t cuFileBufRegister(const void *bufPtr_base, size_t length, int flags);
365
+
366
+ /**
367
+ * @brief deregister an already registered device or host memory from cuFile
368
+ *
369
+ * @param bufPtr_base buffer pointer to deregister
370
+ *
371
+ * @return CU_FILE_SUCCESS on success
372
+ * @return CU_FILE_INVALID_VALUE on invalid memory pointer or unregistered memory pointer
373
+ *
374
+ * @see cuFileBufRegister
375
+ * @see cuFileRead
376
+ * @see cuFileWrite
377
+ */
378
+
379
+ CUfileError_t cuFileBufDeregister(const void *bufPtr_base);
380
+
381
+ /**
382
+ * @brief read data from a registered file handle to a specified device or host memory
383
+ *
384
+ * @param fh @ref CUfileHandle_t opaque file handle
385
+ * @param bufPtr_base base address of buffer in device or host memory
386
+ * @param size size bytes to read
387
+ * @param file_offset file-offset from begining of the file
388
+ * @param bufPtr_offset offset relative to the bufPtr_base pointer to read into.
389
+ *
390
+ * @return size of bytes successfully read
391
+ * @return -1 on error, in which case errno is set to indicate filesystem errors.
392
+ * @return all other errors will return a negative integer value of @ref CUfileOpError enum value.
393
+ *
394
+ * @note If the bufPtr is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers if needed.
395
+ * @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
396
+ * for cases where the BAR1 memory size is smaller than the size of the allocated memory.
397
+ *
398
+ * @see cuFileBufRegister
399
+ * @see cuFileHandleRegister
400
+ * @see cuFileWrite
401
+ */
402
+
403
+ ssize_t cuFileRead(CUfileHandle_t fh, void *bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset);
404
+
405
+ /**
406
+ * @brief write data from a specified device or host memory to a registered file handle
407
+ *
408
+ * @param fh @ref CUfileHandle_t opaque file handle
409
+ * @param bufPtr_base base address of buffer in device or host memory
410
+ * @param size size bytes to write
411
+ * @param file_offset file-offset from begining of the file
412
+ * @param bufPtr_offset offset relative to the bufPtr_base pointer to write from.
413
+ *
414
+ * @return size of bytes successfully written
415
+ * @return -1 on error, in which case errno is set to indicate filesystem errors.
416
+ * @return all other errors will return a negative integer value of @ref CUfileOpError enum value.
417
+ *
418
+ * @note If the bufPtr is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers if needed.
419
+ * @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
420
+ * for cases where the BAR1 memory size is smaller than the size of the allocated memory.
421
+ *
422
+ * @see cuFileBufRegister
423
+ * @see cuFileHandleRegister
424
+ * @see cuFileRead
425
+ */
426
+
427
+ ssize_t cuFileWrite(CUfileHandle_t fh, const void *bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset);
428
+
429
+ // CUFile Driver APIs
430
+
431
+ /**
432
+ * @brief
433
+ * Initialize the cuFile library and open the nvidia-fs driver
434
+ *
435
+ * @return CU_FILE_SUCCESS on success
436
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED
437
+ * @return CU_FILE_DRIVER_VERSION_MISMATCH on driver version mismatch error
438
+ *
439
+ * @see cuFileDriverClose
440
+ */
441
+ CUfileError_t cuFileDriverOpen(void);
442
+
443
+ CUfileError_t cuFileDriverClose(void);
444
+ #define cuFileDriverClose cuFileDriverClose_v2
445
+ /**
446
+ * @brief
447
+ * reset the cuFile library and release the nvidia-fs driver
448
+ *
449
+ * @return CU_FILE_SUCCESS on success
450
+ * @return CU_FILE_DRIVER_CLOSING if there are any active IO operations using @ref cuFileRead or @ref cuFileWrite
451
+ *
452
+ * @see cuFileDriverOpen
453
+ */
454
+ CUfileError_t cuFileDriverClose(void);
455
+
456
+ /**
457
+ * @brief
458
+ * returns use count of cufile drivers at that moment by the process.
459
+ */
460
+ long cuFileUseCount(void);
461
+
462
+ /**
463
+ * @brief
464
+ * Gets the Driver session properties
465
+ *
466
+ * @return CU_FILE_SUCCESS on success
467
+ *
468
+ * @see cuFileDriverSetPollMode
469
+ * @see cuFileDriverSetMaxDirectIOSize
470
+ * @see cuFileDriverSetMaxCacheSize
471
+ * @see cuFileDriverSetMaxPinnedMemSize
472
+ */
473
+ CUfileError_t cuFileDriverGetProperties(CUfileDrvProps_t *props);
474
+
475
+ /**
476
+ * @brief
477
+ * Sets whether the Read/Write APIs use polling to do IO operations
478
+ *
479
+ * @param poll boolean to indicate whether to use poll mode or not
480
+ * @param poll_threshold_size max IO size to use for POLLING mode in KB
481
+ *
482
+ * @return CU_FILE_SUCCESS on success
483
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
484
+ * @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
485
+ *
486
+ * @warning This is an advanced command and should be tuned based on available system memory
487
+ *
488
+ * @see cuFileDriverGetProperties
489
+ */
490
+ CUfileError_t cuFileDriverSetPollMode(bool poll, size_t poll_threshold_size);
491
+
492
+ /**
493
+ * @brief
494
+ * Control parameter to set max IO size(KB) used by the library to talk to nvidia-fs driver
495
+ *
496
+ * @param max_direct_io_size maximum allowed direct io size in KB
497
+ *
498
+ * @return CU_FILE_SUCCESS on success
499
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
500
+ * @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
501
+ *
502
+ * @warning This is an advanced command and should be tuned based on available system memory
503
+ *
504
+ * @see cuFileDriverGetProperties
505
+ *
506
+ */
507
+ CUfileError_t cuFileDriverSetMaxDirectIOSize(size_t max_direct_io_size);
508
+
509
+ /**
510
+ * @brief
511
+ * Control parameter to set maximum GPU memory reserved per device by the library for internal buffering
512
+ *
513
+ * @param max_cache_size The maximum GPU buffer space per device used for internal use in KB
514
+ *
515
+ * @return CU_FILE_SUCCESS on success
516
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
517
+ * @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
518
+ *
519
+ * @warning This is an advanced command and should be tuned based on supported GPU memory
520
+ *
521
+ * @see cuFileDriverGetProperties
522
+ */
523
+ CUfileError_t cuFileDriverSetMaxCacheSize(size_t max_cache_size);
524
+
525
+ /**
526
+ * @brief
527
+ * Sets maximum buffer space that is pinned in KB for use by @ref cuFileBufRegister
528
+ *
529
+ * @param max_pinned_size maximum buffer space that is pinned in KB
530
+ *
531
+ * @return CU_FILE_SUCCESS on success
532
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
533
+ * @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
534
+ *
535
+ * @warning This is an advanced command and should be tuned based on supported GPU memory
536
+ *
537
+ * @see cuFileDriverGetProperties
538
+ *
539
+ */
540
+ CUfileError_t cuFileDriverSetMaxPinnedMemSize(size_t max_pinned_size);
541
+
542
+ //Experimental Batch API's
543
+
544
+
545
+ typedef enum CUfileOpcode {
546
+ CUFILE_READ = 0,
547
+ CUFILE_WRITE
548
+ }CUfileOpcode_t;
549
+
550
+ typedef enum CUFILEStatus_enum {
551
+ CUFILE_WAITING = 0x000001, /* required value prior to submission */
552
+ CUFILE_PENDING = 0x000002, /* once enqueued */
553
+ CUFILE_INVALID = 0x000004, /* request was ill-formed or could not be enqueued */
554
+ CUFILE_CANCELED = 0x000008, /* request successfully canceled */
555
+ CUFILE_COMPLETE = 0x0000010, /* request successfully completed */
556
+ CUFILE_TIMEOUT = 0x0000020, /* request timed out */
557
+ CUFILE_FAILED = 0x0000040 /* unable to complete */
558
+ }CUfileStatus_t;
559
+ typedef enum cufileBatchMode {
560
+ CUFILE_BATCH = 1,
561
+ } CUfileBatchMode_t;
562
+ typedef struct CUfileIOParams {
563
+ CUfileBatchMode_t mode; // Must be the very first field.
564
+ union {
565
+ struct {
566
+ void *devPtr_base; //This can be a device memory or a host memory pointer.
567
+ off_t file_offset;
568
+ off_t devPtr_offset;
569
+ size_t size;
570
+ }batch;
571
+ }u;
572
+ CUfileHandle_t fh;
573
+ CUfileOpcode_t opcode;
574
+ void *cookie;
575
+ }CUfileIOParams_t;
576
+ typedef struct CUfileIOEvents {
577
+ void *cookie;
578
+ CUfileStatus_t status; /* status of the operation */
579
+ size_t ret; /* -ve error or amount of I/O done. */
580
+ }CUfileIOEvents_t;
581
+
582
+ typedef void* CUfileBatchHandle_t;
583
+
584
+ CUfileError_t cuFileBatchIOSetUp(CUfileBatchHandle_t *batch_idp, unsigned nr);
585
+ CUfileError_t cuFileBatchIOSubmit(CUfileBatchHandle_t batch_idp, unsigned nr, CUfileIOParams_t *iocbp, unsigned int flags);
586
+ CUfileError_t cuFileBatchIOGetStatus(CUfileBatchHandle_t batch_idp, unsigned min_nr, unsigned* nr,
587
+ CUfileIOEvents_t *iocbp, struct timespec* timeout);
588
+ CUfileError_t cuFileBatchIOCancel(CUfileBatchHandle_t batch_idp);
589
+ void cuFileBatchIODestroy(CUfileBatchHandle_t batch_idp);
590
+
591
+ //Async API's with cuda streams
592
+
593
+ // cuFile stream API registration flags
594
+ // buffer pointer offset is set at submission time
595
+ #define CU_FILE_STREAM_FIXED_BUF_OFFSET 1
596
+ // file offset is set at submission time
597
+ #define CU_FILE_STREAM_FIXED_FILE_OFFSET 2
598
+ // file size is set at submission time
599
+ #define CU_FILE_STREAM_FIXED_FILE_SIZE 4
600
+ // size, offset and buffer offset are 4k aligned
601
+ #define CU_FILE_STREAM_PAGE_ALIGNED_INPUTS 8
602
+
603
+ /**
604
+ *@brief
605
+
606
+ * @param fh The cuFile handle for the file.
607
+ * @param bufPtr_base base address of buffer in device or host memory
608
+ * @param size_p pointer to size bytes to read
609
+ * @note *size_p if the size is not known at the time of submission, then must provide the max possible size for I/O request.
610
+ * @param file_offset_p pointer to file-offset from begining of the file
611
+ * @param bufPtr_offset_p pointer to offset relative to the bufPtr_base pointer to read into.
612
+ * @param bytes_read_p pointer to the number of bytes that were successfully read.
613
+ * @param CUstream stream cuda stream for the operation.
614
+ *
615
+ * @return size of bytes successfully read in *bytes_read_p
616
+ * @return -1 on error, in which case errno is set to indicate filesystem errors.
617
+ * @return all other errors will return a negative integer value of @ref CUfileOpError enum value.
618
+ *
619
+ * @note If the bufPtr_base is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers.
620
+ * @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
621
+ * for cases where the BAR1 memory size is smaller than the size of the allocated memory.
622
+ * @note If the stream is registered with cuFileStreamRegister, the IO setup and teardown overhead will be reduced.
623
+ * @note on cuda stream errors, the user must call cuFileStreamDeregister to release any outstanding cuFile resources for the stream.
624
+ *
625
+ *
626
+ * @see cuFileBufRegister
627
+ * @see cuFileHandleRegister
628
+ * @see cuFileRead
629
+ * @see cuFileStreamRegister
630
+ * @see cuFileStreamDeregister
631
+ */
632
+
633
+ CUfileError_t cuFileReadAsync(CUfileHandle_t fh, void *bufPtr_base,
634
+ size_t *size_p, off_t *file_offset_p, off_t *bufPtr_offset_p, ssize_t *bytes_read_p, CUstream stream);
635
+
636
+ /**
637
+ *@brief
638
+
639
+ * @param fh The cuFile handle for the file.
640
+ * @param bufPtr_base base address of buffer in device or host memory
641
+ * @param size_p pointer to size bytes to write.
642
+ * @note *size_p if the size is not known at the time of submission, then must provide the max possible size for I/O request.
643
+ * @param file_offset_p pointer to file-offset from begining of the file
644
+ * @param bufPtr_offset_p pointer to offset relative to the bufPtr_base pointer to write from.
645
+ * @param bytes_written_p pointer to the number of bytes that were successfully written.
646
+ * @param CUstream cuda stream for the operation.
647
+ *
648
+ * @return size of bytes successfully written in *bytes_written_p
649
+ * @return -1 on error, in which case errno is set to indicate filesystem errors.
650
+ * @return all other errors will return a negative integer value of @ref CUfileOpError enum value.
651
+ *
652
+ * @note If the bufPtr_base is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers.
653
+ * @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
654
+ * for cases where the BAR1 memory size is smaller than the size of the allocated memory.
655
+ * @note If the stream is registered with cuFileStreamRegister prior to this call, the IO setup and teardown overhead will be reduced.
656
+ * @note on cuda stream errors, the user must call cuFileStreamDeregister to release any outstanding cuFile resources for the stream.
657
+ *
658
+ * @see cuFileBufRegister
659
+ * @see cuFileHandleRegister
660
+ * @see cuFileWrite
661
+ * @see cuFileStreamRegister
662
+ * @see cuFileStreamDeregister
663
+ */
664
+
665
+ CUfileError_t cuFileWriteAsync(CUfileHandle_t fh, void *bufPtr_base,
666
+ size_t *size_p, off_t *file_offset_p, off_t *bufPtr_offset_p, ssize_t *bytes_written_p, CUstream stream);
667
+
668
+ /**
669
+ *@brief
670
+
671
+ * @param CUstream cuda stream for the operation.
672
+ * @param flags for the stream to improve the stream execution of IO based on input parameters.
673
+ * @note supported FLAGS are
674
+ * @note CU_FILE_STREAM_FIXED_BUF_OFFSET - buffer pointer offset is set at submission time
675
+ * @note CU_FILE_STREAM_FIXED_FILE_OFFSET - file offset is set at submission time
676
+ * @note CU_FILE_STREAM_FIXED_FILE_SIZE - file size is set at submission time
677
+ * @note CU_FILE_STREAM_PAGE_ALIGNED_INPUTS - size, offset and buffer offset are 4k aligned
678
+ *
679
+ * @note allocates resources needed to support cuFile operations asynchronously for the cuda stream
680
+ * @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
681
+ * for cases where the BAR1 memory size is smaller than the size of the allocated memory.
682
+ *
683
+ * @return CU_FILE_SUCCESS on success
684
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
685
+ * @return CU_FILE_INVALID_VALUE if the stream is invalid
686
+ *
687
+ * @see cuFileReadAsync
688
+ * @see cuFileWriteAsync
689
+ * @see cuFileStreamDeregister
690
+ */
691
+
692
+ CUfileError_t cuFileStreamRegister(CUstream stream, unsigned flags);
693
+
694
+ /**
695
+ *@brief
696
+
697
+ * @param CUstream cuda stream for the operation.
698
+ *
699
+ * @note deallocates resources used by previous cuFile asynchronous operations for the cuda stream
700
+ * @note highly recommend to call after cuda stream errors to release any outstanding cuFile resources for this stream
701
+ * @note must be called before cuStreamDestroy call for the specified stream.
702
+ * @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
703
+ * for cases where the BAR1 memory size is smaller than the size of the allocated memory.
704
+ *
705
+ * @return CU_FILE_SUCCESS on success
706
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
707
+ * @return CU_FILE_INVALID_VALUE if the stream is invalid
708
+ *
709
+ * @see cuFileReadAsync
710
+ * @see cuFileWriteAsync
711
+ * @see cuFileStreamRegister
712
+ */
713
+
714
+ CUfileError_t cuFileStreamDeregister(CUstream stream);
715
+
716
+ /**
717
+ *@brief
718
+
719
+ * @returns cufile library version.
720
+ *
721
+ * @The version is returned as (1000 major + 10 minor).
722
+ * @For example, CUFILE 1.7.0 would be represented by 1070.
723
+ * @note This is useful for applications that need to inquire the library.
724
+ *
725
+ * @return CU_FILE_SUCCESS on success
726
+ * @return CU_FILE_INVALID_VALUE if the input parameter is null.
727
+ * @return CU_FILE_DRIVER_VERSION_READ_ERROR if the version is not available.
728
+ *
729
+ */
730
+
731
+ CUfileError_t cuFileGetVersion(int *version);
732
+
733
+ #pragma GCC visibility pop
734
+
735
+ /// @cond DOXYGEN_SKIP_MACRO
736
+ #endif // CUFILE_H
737
+ /// @endcond
738
+ #ifdef __cplusplus
739
+ }
740
+ #endif
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/lib/__init__.py ADDED
File without changes
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/lib/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (209 Bytes). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/lib/libcufile_rdma.so.1 ADDED
Binary file (46.5 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (205 Bytes). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/include/__init__.py ADDED
File without changes
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/include/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (213 Bytes). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/include/curand.h ADDED
@@ -0,0 +1,1080 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ /* Copyright 2010-2014 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * The source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * The Licensed Deliverables contained herein are PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and are being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. THEY ARE
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(CURAND_H_)
51
+ #define CURAND_H_
52
+
53
+ /**
54
+ * \defgroup HOST Host API
55
+ *
56
+ * @{
57
+ */
58
+ #ifndef __CUDACC_RTC__
59
+ #include <cuda_runtime.h>
60
+ #endif
61
+
62
+ #ifndef CURANDAPI
63
+ #ifdef _WIN32
64
+ #define CURANDAPI __stdcall
65
+ #else
66
+ #define CURANDAPI
67
+ #endif
68
+ #endif
69
+
70
+ #if defined(__cplusplus)
71
+ extern "C" {
72
+ #endif /* __cplusplus */
73
+
74
+ #define CURAND_VER_MAJOR 10
75
+ #define CURAND_VER_MINOR 3
76
+ #define CURAND_VER_PATCH 9
77
+ #define CURAND_VER_BUILD 90
78
+ #define CURAND_VERSION (CURAND_VER_MAJOR * 1000 + \
79
+ CURAND_VER_MINOR * 100 + \
80
+ CURAND_VER_PATCH)
81
+ /* CURAND Host API datatypes */
82
+
83
+ /**
84
+ * @{
85
+ */
86
+
87
+ /**
88
+ * CURAND function call status types
89
+ */
90
+ enum curandStatus {
91
+ CURAND_STATUS_SUCCESS = 0, ///< No errors
92
+ CURAND_STATUS_VERSION_MISMATCH = 100, ///< Header file and linked library version do not match
93
+ CURAND_STATUS_NOT_INITIALIZED = 101, ///< Generator not initialized
94
+ CURAND_STATUS_ALLOCATION_FAILED = 102, ///< Memory allocation failed
95
+ CURAND_STATUS_TYPE_ERROR = 103, ///< Generator is wrong type
96
+ CURAND_STATUS_OUT_OF_RANGE = 104, ///< Argument out of range
97
+ CURAND_STATUS_LENGTH_NOT_MULTIPLE = 105, ///< Length requested is not a multple of dimension
98
+ CURAND_STATUS_DOUBLE_PRECISION_REQUIRED = 106, ///< GPU does not have double precision required by MRG32k3a
99
+ CURAND_STATUS_LAUNCH_FAILURE = 201, ///< Kernel launch failure
100
+ CURAND_STATUS_PREEXISTING_FAILURE = 202, ///< Preexisting failure on library entry
101
+ CURAND_STATUS_INITIALIZATION_FAILED = 203, ///< Initialization of CUDA failed
102
+ CURAND_STATUS_ARCH_MISMATCH = 204, ///< Architecture mismatch, GPU does not support requested feature
103
+ CURAND_STATUS_INTERNAL_ERROR = 999 ///< Internal library error
104
+ };
105
+
106
+ /*
107
+ * CURAND function call status types
108
+ */
109
+ /** \cond UNHIDE_TYPEDEFS */
110
+ typedef enum curandStatus curandStatus_t;
111
+ /** \endcond */
112
+
113
+ /**
114
+ * CURAND generator types
115
+ */
116
+ enum curandRngType {
117
+ CURAND_RNG_TEST = 0,
118
+ CURAND_RNG_PSEUDO_DEFAULT = 100, ///< Default pseudorandom generator
119
+ CURAND_RNG_PSEUDO_XORWOW = 101, ///< XORWOW pseudorandom generator
120
+ CURAND_RNG_PSEUDO_MRG32K3A = 121, ///< MRG32k3a pseudorandom generator
121
+ CURAND_RNG_PSEUDO_MTGP32 = 141, ///< Mersenne Twister MTGP32 pseudorandom generator
122
+ CURAND_RNG_PSEUDO_MT19937 = 142, ///< Mersenne Twister MT19937 pseudorandom generator
123
+ CURAND_RNG_PSEUDO_PHILOX4_32_10 = 161, ///< PHILOX-4x32-10 pseudorandom generator
124
+ CURAND_RNG_QUASI_DEFAULT = 200, ///< Default quasirandom generator
125
+ CURAND_RNG_QUASI_SOBOL32 = 201, ///< Sobol32 quasirandom generator
126
+ CURAND_RNG_QUASI_SCRAMBLED_SOBOL32 = 202, ///< Scrambled Sobol32 quasirandom generator
127
+ CURAND_RNG_QUASI_SOBOL64 = 203, ///< Sobol64 quasirandom generator
128
+ CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 = 204 ///< Scrambled Sobol64 quasirandom generator
129
+ };
130
+
131
+ /*
132
+ * CURAND generator types
133
+ */
134
+ /** \cond UNHIDE_TYPEDEFS */
135
+ typedef enum curandRngType curandRngType_t;
136
+ /** \endcond */
137
+
138
+ /**
139
+ * CURAND ordering of results in memory
140
+ */
141
+ enum curandOrdering {
142
+ CURAND_ORDERING_PSEUDO_BEST = 100, ///< Best ordering for pseudorandom results
143
+ CURAND_ORDERING_PSEUDO_DEFAULT = 101, ///< Specific default thread sequence for pseudorandom results, same as CURAND_ORDERING_PSEUDO_BEST
144
+ CURAND_ORDERING_PSEUDO_SEEDED = 102, ///< Specific seeding pattern for fast lower quality pseudorandom results
145
+ CURAND_ORDERING_PSEUDO_LEGACY = 103, ///< Specific legacy sequence for pseudorandom results, guaranteed to remain the same for all cuRAND release
146
+ CURAND_ORDERING_PSEUDO_DYNAMIC = 104, ///< Specific ordering adjusted to the device it is being executed on, provides the best performance
147
+ CURAND_ORDERING_QUASI_DEFAULT = 201 ///< Specific n-dimensional ordering for quasirandom results
148
+ };
149
+
150
+ /*
151
+ * CURAND ordering of results in memory
152
+ */
153
+ /** \cond UNHIDE_TYPEDEFS */
154
+ typedef enum curandOrdering curandOrdering_t;
155
+ /** \endcond */
156
+
157
+ /**
158
+ * CURAND choice of direction vector set
159
+ */
160
+ enum curandDirectionVectorSet {
161
+ CURAND_DIRECTION_VECTORS_32_JOEKUO6 = 101, ///< Specific set of 32-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions
162
+ CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6 = 102, ///< Specific set of 32-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions, and scrambled
163
+ CURAND_DIRECTION_VECTORS_64_JOEKUO6 = 103, ///< Specific set of 64-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions
164
+ CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6 = 104 ///< Specific set of 64-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions, and scrambled
165
+ };
166
+
167
+ /*
168
+ * CURAND choice of direction vector set
169
+ */
170
+ /** \cond UNHIDE_TYPEDEFS */
171
+ typedef enum curandDirectionVectorSet curandDirectionVectorSet_t;
172
+ /** \endcond */
173
+
174
+ /**
175
+ * CURAND array of 32-bit direction vectors
176
+ */
177
+ /** \cond UNHIDE_TYPEDEFS */
178
+ typedef unsigned int curandDirectionVectors32_t[32];
179
+ /** \endcond */
180
+
181
+ /**
182
+ * CURAND array of 64-bit direction vectors
183
+ */
184
+ /** \cond UNHIDE_TYPEDEFS */
185
+ typedef unsigned long long curandDirectionVectors64_t[64];
186
+ /** \endcond **/
187
+
188
+ /**
189
+ * CURAND generator (opaque)
190
+ */
191
+ struct curandGenerator_st;
192
+
193
+ /**
194
+ * CURAND generator
195
+ */
196
+ /** \cond UNHIDE_TYPEDEFS */
197
+ typedef struct curandGenerator_st *curandGenerator_t;
198
+ /** \endcond */
199
+
200
+ /**
201
+ * CURAND distribution
202
+ */
203
+ /** \cond UNHIDE_TYPEDEFS */
204
+ typedef double curandDistribution_st;
205
+ typedef curandDistribution_st *curandDistribution_t;
206
+ typedef struct curandDistributionShift_st *curandDistributionShift_t;
207
+ /** \endcond */
208
+ /**
209
+ * CURAND distribution M2
210
+ */
211
+ /** \cond UNHIDE_TYPEDEFS */
212
+ typedef struct curandDistributionM2Shift_st *curandDistributionM2Shift_t;
213
+ typedef struct curandHistogramM2_st *curandHistogramM2_t;
214
+ typedef unsigned int curandHistogramM2K_st;
215
+ typedef curandHistogramM2K_st *curandHistogramM2K_t;
216
+ typedef curandDistribution_st curandHistogramM2V_st;
217
+ typedef curandHistogramM2V_st *curandHistogramM2V_t;
218
+
219
+ typedef struct curandDiscreteDistribution_st *curandDiscreteDistribution_t;
220
+ /** \endcond */
221
+
222
+ /*
223
+ * CURAND METHOD
224
+ */
225
+ /** \cond UNHIDE_ENUMS */
226
+ enum curandMethod {
227
+ CURAND_CHOOSE_BEST = 0, // choose best depends on args
228
+ CURAND_ITR = 1,
229
+ CURAND_KNUTH = 2,
230
+ CURAND_HITR = 3,
231
+ CURAND_M1 = 4,
232
+ CURAND_M2 = 5,
233
+ CURAND_BINARY_SEARCH = 6,
234
+ CURAND_DISCRETE_GAUSS = 7,
235
+ CURAND_REJECTION = 8,
236
+ CURAND_DEVICE_API = 9,
237
+ CURAND_FAST_REJECTION = 10,
238
+ CURAND_3RD = 11,
239
+ CURAND_DEFINITION = 12,
240
+ CURAND_POISSON = 13
241
+ };
242
+
243
+ typedef enum curandMethod curandMethod_t;
244
+ /** \endcond */
245
+
246
+
247
+ #ifndef __CUDACC_RTC__
248
+
249
+ /**
250
+ * @}
251
+ */
252
+
253
+ /**
254
+ * \brief Create new random number generator.
255
+ *
256
+ * Creates a new random number generator of type \p rng_type
257
+ * and returns it in \p *generator.
258
+ *
259
+ * Legal values for \p rng_type are:
260
+ * - CURAND_RNG_PSEUDO_DEFAULT
261
+ * - CURAND_RNG_PSEUDO_XORWOW
262
+ * - CURAND_RNG_PSEUDO_MRG32K3A
263
+ * - CURAND_RNG_PSEUDO_MTGP32
264
+ * - CURAND_RNG_PSEUDO_MT19937
265
+ * - CURAND_RNG_PSEUDO_PHILOX4_32_10
266
+ * - CURAND_RNG_QUASI_DEFAULT
267
+ * - CURAND_RNG_QUASI_SOBOL32
268
+ * - CURAND_RNG_QUASI_SCRAMBLED_SOBOL32
269
+ * - CURAND_RNG_QUASI_SOBOL64
270
+ * - CURAND_RNG_QUASI_SCRAMBLED_SOBOL64
271
+ *
272
+ * When \p rng_type is CURAND_RNG_PSEUDO_DEFAULT, the type chosen
273
+ * is CURAND_RNG_PSEUDO_XORWOW. \n
274
+ * When \p rng_type is CURAND_RNG_QUASI_DEFAULT,
275
+ * the type chosen is CURAND_RNG_QUASI_SOBOL32.
276
+ *
277
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_XORWOW are:
278
+ * - \p seed = 0
279
+ * - \p offset = 0
280
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
281
+ *
282
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MRG32K3A are:
283
+ * - \p seed = 0
284
+ * - \p offset = 0
285
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
286
+ *
287
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MTGP32 are:
288
+ * - \p seed = 0
289
+ * - \p offset = 0
290
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
291
+ *
292
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MT19937 are:
293
+ * - \p seed = 0
294
+ * - \p offset = 0
295
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
296
+ *
297
+ * * The default values for \p rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10 are:
298
+ * - \p seed = 0
299
+ * - \p offset = 0
300
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
301
+ *
302
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL32 are:
303
+ * - \p dimensions = 1
304
+ * - \p offset = 0
305
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
306
+ *
307
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL64 are:
308
+ * - \p dimensions = 1
309
+ * - \p offset = 0
310
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
311
+ *
312
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBBLED_SOBOL32 are:
313
+ * - \p dimensions = 1
314
+ * - \p offset = 0
315
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
316
+ *
317
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 are:
318
+ * - \p dimensions = 1
319
+ * - \p offset = 0
320
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
321
+ *
322
+ * \param generator - Pointer to generator
323
+ * \param rng_type - Type of generator to create
324
+ *
325
+ * \return
326
+ * - CURAND_STATUS_ALLOCATION_FAILED, if memory could not be allocated \n
327
+ * - CURAND_STATUS_INITIALIZATION_FAILED if there was a problem setting up the GPU \n
328
+ * - CURAND_STATUS_VERSION_MISMATCH if the header file version does not match the
329
+ * dynamically linked library version \n
330
+ * - CURAND_STATUS_TYPE_ERROR if the value for \p rng_type is invalid \n
331
+ * - CURAND_STATUS_SUCCESS if generator was created successfully \n
332
+ *
333
+ */
334
+ curandStatus_t CURANDAPI
335
+ curandCreateGenerator(curandGenerator_t *generator, curandRngType_t rng_type);
336
+
337
+ /**
338
+ * \brief Create new host CPU random number generator.
339
+ *
340
+ * Creates a new host CPU random number generator of type \p rng_type
341
+ * and returns it in \p *generator.
342
+ *
343
+ * Legal values for \p rng_type are:
344
+ * - CURAND_RNG_PSEUDO_DEFAULT
345
+ * - CURAND_RNG_PSEUDO_XORWOW
346
+ * - CURAND_RNG_PSEUDO_MRG32K3A
347
+ * - CURAND_RNG_PSEUDO_MTGP32
348
+ * - CURAND_RNG_PSEUDO_MT19937
349
+ * - CURAND_RNG_PSEUDO_PHILOX4_32_10
350
+ * - CURAND_RNG_QUASI_DEFAULT
351
+ * - CURAND_RNG_QUASI_SOBOL32
352
+ * - CURAND_RNG_QUASI_SCRAMBLED_SOBOL32
353
+ * - CURAND_RNG_QUASI_SOBOL64
354
+ * - CURAND_RNG_QUASI_SCRAMBLED_SOBOL64
355
+ *
356
+ * When \p rng_type is CURAND_RNG_PSEUDO_DEFAULT, the type chosen
357
+ * is CURAND_RNG_PSEUDO_XORWOW. \n
358
+ * When \p rng_type is CURAND_RNG_QUASI_DEFAULT,
359
+ * the type chosen is CURAND_RNG_QUASI_SOBOL32.
360
+ *
361
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_XORWOW are:
362
+ * - \p seed = 0
363
+ * - \p offset = 0
364
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
365
+ *
366
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MRG32K3A are:
367
+ * - \p seed = 0
368
+ * - \p offset = 0
369
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
370
+ *
371
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MTGP32 are:
372
+ * - \p seed = 0
373
+ * - \p offset = 0
374
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
375
+ *
376
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MT19937 are:
377
+ * - \p seed = 0
378
+ * - \p offset = 0
379
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
380
+ *
381
+ * * The default values for \p rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10 are:
382
+ * - \p seed = 0
383
+ * - \p offset = 0
384
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
385
+ *
386
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL32 are:
387
+ * - \p dimensions = 1
388
+ * - \p offset = 0
389
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
390
+ *
391
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL64 are:
392
+ * - \p dimensions = 1
393
+ * - \p offset = 0
394
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
395
+ *
396
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL32 are:
397
+ * - \p dimensions = 1
398
+ * - \p offset = 0
399
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
400
+ *
401
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 are:
402
+ * - \p dimensions = 1
403
+ * - \p offset = 0
404
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
405
+ *
406
+ * \param generator - Pointer to generator
407
+ * \param rng_type - Type of generator to create
408
+ *
409
+ * \return
410
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
411
+ * - CURAND_STATUS_INITIALIZATION_FAILED if there was a problem setting up the GPU \n
412
+ * - CURAND_STATUS_VERSION_MISMATCH if the header file version does not match the
413
+ * dynamically linked library version \n
414
+ * - CURAND_STATUS_TYPE_ERROR if the value for \p rng_type is invalid \n
415
+ * - CURAND_STATUS_SUCCESS if generator was created successfully \n
416
+ */
417
+ curandStatus_t CURANDAPI
418
+ curandCreateGeneratorHost(curandGenerator_t *generator, curandRngType_t rng_type);
419
+
420
+ /**
421
+ * \brief Destroy an existing generator.
422
+ *
423
+ * Destroy an existing generator and free all memory associated with its state.
424
+ *
425
+ * \param generator - Generator to destroy
426
+ *
427
+ * \return
428
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
429
+ * - CURAND_STATUS_SUCCESS if generator was destroyed successfully \n
430
+ */
431
+ curandStatus_t CURANDAPI
432
+ curandDestroyGenerator(curandGenerator_t generator);
433
+
434
+ /**
435
+ * \brief Return the version number of the library.
436
+ *
437
+ * Return in \p *version the version number of the dynamically linked CURAND
438
+ * library. The format is the same as CUDART_VERSION from the CUDA Runtime.
439
+ * The only supported configuration is CURAND version equal to CUDA Runtime
440
+ * version.
441
+ *
442
+ * \param version - CURAND library version
443
+ *
444
+ * \return
445
+ * - CURAND_STATUS_SUCCESS if the version number was successfully returned \n
446
+ */
447
+ curandStatus_t CURANDAPI
448
+ curandGetVersion(int *version);
449
+
450
+ /**
451
+ * \brief Return the value of the curand property.
452
+ *
453
+ * Return in \p *value the number for the property described by \p type of the
454
+ * dynamically linked CURAND library.
455
+ *
456
+ * \param type - CUDA library property
457
+ * \param value - integer value for the requested property
458
+ *
459
+ * \return
460
+ * - CURAND_STATUS_SUCCESS if the property value was successfully returned \n
461
+ * - CURAND_STATUS_OUT_OF_RANGE if the property type is not recognized \n
462
+ */
463
+ curandStatus_t CURANDAPI
464
+ curandGetProperty(libraryPropertyType type, int *value);
465
+
466
+
467
+ /**
468
+ * \brief Set the current stream for CURAND kernel launches.
469
+ *
470
+ * Set the current stream for CURAND kernel launches. All library functions
471
+ * will use this stream until set again.
472
+ *
473
+ * \param generator - Generator to modify
474
+ * \param stream - Stream to use or ::NULL for null stream
475
+ *
476
+ * \return
477
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
478
+ * - CURAND_STATUS_SUCCESS if stream was set successfully \n
479
+ */
480
+ curandStatus_t CURANDAPI
481
+ curandSetStream(curandGenerator_t generator, cudaStream_t stream);
482
+
483
+ /**
484
+ * \brief Set the seed value of the pseudo-random number generator.
485
+ *
486
+ * Set the seed value of the pseudorandom number generator.
487
+ * All values of seed are valid. Different seeds will produce different sequences.
488
+ * Different seeds will often not be statistically correlated with each other,
489
+ * but some pairs of seed values may generate sequences which are statistically correlated.
490
+ *
491
+ * \param generator - Generator to modify
492
+ * \param seed - Seed value
493
+ *
494
+ * \return
495
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
496
+ * - CURAND_STATUS_TYPE_ERROR if the generator is not a pseudorandom number generator \n
497
+ * - CURAND_STATUS_SUCCESS if generator seed was set successfully \n
498
+ */
499
+ curandStatus_t CURANDAPI
500
+ curandSetPseudoRandomGeneratorSeed(curandGenerator_t generator, unsigned long long seed);
501
+
502
+ /**
503
+ * \brief Set the absolute offset of the pseudo or quasirandom number generator.
504
+ *
505
+ * Set the absolute offset of the pseudo or quasirandom number generator.
506
+ *
507
+ * All values of offset are valid. The offset position is absolute, not
508
+ * relative to the current position in the sequence.
509
+ *
510
+ * \param generator - Generator to modify
511
+ * \param offset - Absolute offset position
512
+ *
513
+ * \return
514
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
515
+ * - CURAND_STATUS_SUCCESS if generator offset was set successfully \n
516
+ */
517
+ curandStatus_t CURANDAPI
518
+ curandSetGeneratorOffset(curandGenerator_t generator, unsigned long long offset);
519
+
520
+ /**
521
+ * \brief Set the ordering of results of the pseudo or quasirandom number generator.
522
+ *
523
+ * Set the ordering of results of the pseudo or quasirandom number generator.
524
+ *
525
+ * Legal values of \p order for pseudorandom generators are:
526
+ * - CURAND_ORDERING_PSEUDO_DEFAULT
527
+ * - CURAND_ORDERING_PSEUDO_BEST
528
+ * - CURAND_ORDERING_PSEUDO_SEEDED
529
+ * - CURAND_ORDERING_PSEUDO_LEGACY
530
+ *
531
+ * Legal values of \p order for quasirandom generators are:
532
+ * - CURAND_ORDERING_QUASI_DEFAULT
533
+ *
534
+ * \param generator - Generator to modify
535
+ * \param order - Ordering of results
536
+ *
537
+ * \return
538
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
539
+ * - CURAND_STATUS_OUT_OF_RANGE if the ordering is not valid \n
540
+ * - CURAND_STATUS_SUCCESS if generator ordering was set successfully \n
541
+ */
542
+ curandStatus_t CURANDAPI
543
+ curandSetGeneratorOrdering(curandGenerator_t generator, curandOrdering_t order);
544
+
545
+ /**
546
+ * \brief Set the number of dimensions.
547
+ *
548
+ * Set the number of dimensions to be generated by the quasirandom number
549
+ * generator.
550
+ *
551
+ * Legal values for \p num_dimensions are 1 to 20000.
552
+ *
553
+ * \param generator - Generator to modify
554
+ * \param num_dimensions - Number of dimensions
555
+ *
556
+ * \return
557
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
558
+ * - CURAND_STATUS_OUT_OF_RANGE if num_dimensions is not valid \n
559
+ * - CURAND_STATUS_TYPE_ERROR if the generator is not a quasirandom number generator \n
560
+ * - CURAND_STATUS_SUCCESS if generator ordering was set successfully \n
561
+ */
562
+ curandStatus_t CURANDAPI
563
+ curandSetQuasiRandomGeneratorDimensions(curandGenerator_t generator, unsigned int num_dimensions);
564
+
565
+ /**
566
+ * \brief Generate 32-bit pseudo or quasirandom numbers.
567
+ *
568
+ * Use \p generator to generate \p num 32-bit results into the device memory at
569
+ * \p outputPtr. The device memory must have been previously allocated and be
570
+ * large enough to hold all the results. Launches are done with the stream
571
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
572
+ *
573
+ * Results are 32-bit values with every bit random.
574
+ *
575
+ * \param generator - Generator to use
576
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
577
+ * Pointer to host memory to store CPU-generated results
578
+ * \param num - Number of random 32-bit values to generate
579
+ *
580
+ * \return
581
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
582
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
583
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
584
+ * a previous kernel launch \n
585
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
586
+ * not a multiple of the quasirandom dimension \n
587
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
588
+ * - CURAND_STATUS_TYPE_ERROR if the generator is a 64 bit quasirandom generator.
589
+ * (use ::curandGenerateLongLong() with 64 bit quasirandom generators)
590
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
591
+ */
592
+ curandStatus_t CURANDAPI
593
+ curandGenerate(curandGenerator_t generator, unsigned int *outputPtr, size_t num);
594
+
595
+ /**
596
+ * \brief Generate 64-bit quasirandom numbers.
597
+ *
598
+ * Use \p generator to generate \p num 64-bit results into the device memory at
599
+ * \p outputPtr. The device memory must have been previously allocated and be
600
+ * large enough to hold all the results. Launches are done with the stream
601
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
602
+ *
603
+ * Results are 64-bit values with every bit random.
604
+ *
605
+ * \param generator - Generator to use
606
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
607
+ * Pointer to host memory to store CPU-generated results
608
+ * \param num - Number of random 64-bit values to generate
609
+ *
610
+ * \return
611
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
612
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
613
+ * a previous kernel launch \n
614
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
615
+ * not a multiple of the quasirandom dimension \n
616
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
617
+ * - CURAND_STATUS_TYPE_ERROR if the generator is not a 64 bit quasirandom generator\n
618
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
619
+ */
620
+ curandStatus_t CURANDAPI
621
+ curandGenerateLongLong(curandGenerator_t generator, unsigned long long *outputPtr, size_t num);
622
+
623
+ /**
624
+ * \brief Generate uniformly distributed floats.
625
+ *
626
+ * Use \p generator to generate \p num float results into the device memory at
627
+ * \p outputPtr. The device memory must have been previously allocated and be
628
+ * large enough to hold all the results. Launches are done with the stream
629
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
630
+ *
631
+ * Results are 32-bit floating point values between \p 0.0f and \p 1.0f,
632
+ * excluding \p 0.0f and including \p 1.0f.
633
+ *
634
+ * \param generator - Generator to use
635
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
636
+ * Pointer to host memory to store CPU-generated results
637
+ * \param num - Number of floats to generate
638
+ *
639
+ * \return
640
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
641
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
642
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
643
+ * a previous kernel launch \n
644
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
645
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
646
+ * not a multiple of the quasirandom dimension \n
647
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
648
+ */
649
+ curandStatus_t CURANDAPI
650
+ curandGenerateUniform(curandGenerator_t generator, float *outputPtr, size_t num);
651
+
652
+ /**
653
+ * \brief Generate uniformly distributed doubles.
654
+ *
655
+ * Use \p generator to generate \p num double results into the device memory at
656
+ * \p outputPtr. The device memory must have been previously allocated and be
657
+ * large enough to hold all the results. Launches are done with the stream
658
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
659
+ *
660
+ * Results are 64-bit double precision floating point values between
661
+ * \p 0.0 and \p 1.0, excluding \p 0.0 and including \p 1.0.
662
+ *
663
+ * \param generator - Generator to use
664
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
665
+ * Pointer to host memory to store CPU-generated results
666
+ * \param num - Number of doubles to generate
667
+ *
668
+ * \return
669
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
670
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
671
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
672
+ * a previous kernel launch \n
673
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
674
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
675
+ * not a multiple of the quasirandom dimension \n
676
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
677
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
678
+ */
679
+ curandStatus_t CURANDAPI
680
+ curandGenerateUniformDouble(curandGenerator_t generator, double *outputPtr, size_t num);
681
+
682
+ /**
683
+ * \brief Generate normally distributed doubles.
684
+ *
685
+ * Use \p generator to generate \p n float results into the device memory at
686
+ * \p outputPtr. The device memory must have been previously allocated and be
687
+ * large enough to hold all the results. Launches are done with the stream
688
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
689
+ *
690
+ * Results are 32-bit floating point values with mean \p mean and standard
691
+ * deviation \p stddev.
692
+ *
693
+ * Normally distributed results are generated from pseudorandom generators
694
+ * with a Box-Muller transform, and so require \p n to be even.
695
+ * Quasirandom generators use an inverse cumulative distribution
696
+ * function to preserve dimensionality.
697
+ *
698
+ * There may be slight numerical differences between results generated
699
+ * on the GPU with generators created with ::curandCreateGenerator()
700
+ * and results calculated on the CPU with generators created with
701
+ * ::curandCreateGeneratorHost(). These differences arise because of
702
+ * differences in results for transcendental functions. In addition,
703
+ * future versions of CURAND may use newer versions of the CUDA math
704
+ * library, so different versions of CURAND may give slightly different
705
+ * numerical values.
706
+ *
707
+ * \param generator - Generator to use
708
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
709
+ * Pointer to host memory to store CPU-generated results
710
+ * \param n - Number of floats to generate
711
+ * \param mean - Mean of normal distribution
712
+ * \param stddev - Standard deviation of normal distribution
713
+ *
714
+ * \return
715
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
716
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
717
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
718
+ * a previous kernel launch \n
719
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
720
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
721
+ * not a multiple of the quasirandom dimension, or is not a multiple
722
+ * of two for pseudorandom generators \n
723
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
724
+ */
725
+ curandStatus_t CURANDAPI
726
+ curandGenerateNormal(curandGenerator_t generator, float *outputPtr,
727
+ size_t n, float mean, float stddev);
728
+
729
+ /**
730
+ * \brief Generate normally distributed doubles.
731
+ *
732
+ * Use \p generator to generate \p n double results into the device memory at
733
+ * \p outputPtr. The device memory must have been previously allocated and be
734
+ * large enough to hold all the results. Launches are done with the stream
735
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
736
+ *
737
+ * Results are 64-bit floating point values with mean \p mean and standard
738
+ * deviation \p stddev.
739
+ *
740
+ * Normally distributed results are generated from pseudorandom generators
741
+ * with a Box-Muller transform, and so require \p n to be even.
742
+ * Quasirandom generators use an inverse cumulative distribution
743
+ * function to preserve dimensionality.
744
+ *
745
+ * There may be slight numerical differences between results generated
746
+ * on the GPU with generators created with ::curandCreateGenerator()
747
+ * and results calculated on the CPU with generators created with
748
+ * ::curandCreateGeneratorHost(). These differences arise because of
749
+ * differences in results for transcendental functions. In addition,
750
+ * future versions of CURAND may use newer versions of the CUDA math
751
+ * library, so different versions of CURAND may give slightly different
752
+ * numerical values.
753
+ *
754
+ * \param generator - Generator to use
755
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
756
+ * Pointer to host memory to store CPU-generated results
757
+ * \param n - Number of doubles to generate
758
+ * \param mean - Mean of normal distribution
759
+ * \param stddev - Standard deviation of normal distribution
760
+ *
761
+ * \return
762
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
763
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
764
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
765
+ * a previous kernel launch \n
766
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
767
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
768
+ * not a multiple of the quasirandom dimension, or is not a multiple
769
+ * of two for pseudorandom generators \n
770
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
771
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
772
+ */
773
+ curandStatus_t CURANDAPI
774
+ curandGenerateNormalDouble(curandGenerator_t generator, double *outputPtr,
775
+ size_t n, double mean, double stddev);
776
+
777
+ /**
778
+ * \brief Generate log-normally distributed floats.
779
+ *
780
+ * Use \p generator to generate \p n float results into the device memory at
781
+ * \p outputPtr. The device memory must have been previously allocated and be
782
+ * large enough to hold all the results. Launches are done with the stream
783
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
784
+ *
785
+ * Results are 32-bit floating point values with log-normal distribution based on
786
+ * an associated normal distribution with mean \p mean and standard deviation \p stddev.
787
+ *
788
+ * Normally distributed results are generated from pseudorandom generators
789
+ * with a Box-Muller transform, and so require \p n to be even.
790
+ * Quasirandom generators use an inverse cumulative distribution
791
+ * function to preserve dimensionality.
792
+ * The normally distributed results are transformed into log-normal distribution.
793
+ *
794
+ * There may be slight numerical differences between results generated
795
+ * on the GPU with generators created with ::curandCreateGenerator()
796
+ * and results calculated on the CPU with generators created with
797
+ * ::curandCreateGeneratorHost(). These differences arise because of
798
+ * differences in results for transcendental functions. In addition,
799
+ * future versions of CURAND may use newer versions of the CUDA math
800
+ * library, so different versions of CURAND may give slightly different
801
+ * numerical values.
802
+ *
803
+ * \param generator - Generator to use
804
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
805
+ * Pointer to host memory to store CPU-generated results
806
+ * \param n - Number of floats to generate
807
+ * \param mean - Mean of associated normal distribution
808
+ * \param stddev - Standard deviation of associated normal distribution
809
+ *
810
+ * \return
811
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
812
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
813
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
814
+ * a previous kernel launch \n
815
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
816
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
817
+ * not a multiple of the quasirandom dimension, or is not a multiple
818
+ * of two for pseudorandom generators \n
819
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
820
+ */
821
+ curandStatus_t CURANDAPI
822
+ curandGenerateLogNormal(curandGenerator_t generator, float *outputPtr,
823
+ size_t n, float mean, float stddev);
824
+
825
+ /**
826
+ * \brief Generate log-normally distributed doubles.
827
+ *
828
+ * Use \p generator to generate \p n double results into the device memory at
829
+ * \p outputPtr. The device memory must have been previously allocated and be
830
+ * large enough to hold all the results. Launches are done with the stream
831
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
832
+ *
833
+ * Results are 64-bit floating point values with log-normal distribution based on
834
+ * an associated normal distribution with mean \p mean and standard deviation \p stddev.
835
+ *
836
+ * Normally distributed results are generated from pseudorandom generators
837
+ * with a Box-Muller transform, and so require \p n to be even.
838
+ * Quasirandom generators use an inverse cumulative distribution
839
+ * function to preserve dimensionality.
840
+ * The normally distributed results are transformed into log-normal distribution.
841
+ *
842
+ * There may be slight numerical differences between results generated
843
+ * on the GPU with generators created with ::curandCreateGenerator()
844
+ * and results calculated on the CPU with generators created with
845
+ * ::curandCreateGeneratorHost(). These differences arise because of
846
+ * differences in results for transcendental functions. In addition,
847
+ * future versions of CURAND may use newer versions of the CUDA math
848
+ * library, so different versions of CURAND may give slightly different
849
+ * numerical values.
850
+ *
851
+ * \param generator - Generator to use
852
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
853
+ * Pointer to host memory to store CPU-generated results
854
+ * \param n - Number of doubles to generate
855
+ * \param mean - Mean of normal distribution
856
+ * \param stddev - Standard deviation of normal distribution
857
+ *
858
+ * \return
859
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
860
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
861
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
862
+ * a previous kernel launch \n
863
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
864
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
865
+ * not a multiple of the quasirandom dimension, or is not a multiple
866
+ * of two for pseudorandom generators \n
867
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
868
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
869
+ */
870
+ curandStatus_t CURANDAPI
871
+ curandGenerateLogNormalDouble(curandGenerator_t generator, double *outputPtr,
872
+ size_t n, double mean, double stddev);
873
+
874
+ /**
875
+ * \brief Construct the histogram array for a Poisson distribution.
876
+ *
877
+ * Construct the histogram array for the Poisson distribution with lambda \p lambda.
878
+ * For lambda greater than 2000, an approximation with a normal distribution is used.
879
+ *
880
+ * \param lambda - lambda for the Poisson distribution
881
+ *
882
+ *
883
+ * \param discrete_distribution - pointer to the histogram in device memory
884
+ *
885
+ * \return
886
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
887
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
888
+ * - CURAND_STATUS_INITIALIZATION_FAILED if there was a problem setting up the GPU \n
889
+ * - CURAND_STATUS_NOT_INITIALIZED if the distribution pointer was null \n
890
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
891
+ * a previous kernel launch \n
892
+ * - CURAND_STATUS_OUT_OF_RANGE if lambda is non-positive or greater than 400,000 \n
893
+ * - CURAND_STATUS_SUCCESS if the histogram was generated successfully \n
894
+ */
895
+
896
+ curandStatus_t CURANDAPI
897
+ curandCreatePoissonDistribution(double lambda, curandDiscreteDistribution_t *discrete_distribution);
898
+
899
+
900
+
901
+ /**
902
+ * \brief Destroy the histogram array for a discrete distribution (e.g. Poisson).
903
+ *
904
+ * Destroy the histogram array for a discrete distribution created by curandCreatePoissonDistribution.
905
+ *
906
+ * \param discrete_distribution - pointer to device memory where the histogram is stored
907
+ *
908
+ * \return
909
+ * - CURAND_STATUS_NOT_INITIALIZED if the histogram was never created \n
910
+ * - CURAND_STATUS_SUCCESS if the histogram was destroyed successfully \n
911
+ */
912
+ curandStatus_t CURANDAPI
913
+ curandDestroyDistribution(curandDiscreteDistribution_t discrete_distribution);
914
+
915
+
916
+ /**
917
+ * \brief Generate Poisson-distributed unsigned ints.
918
+ *
919
+ * Use \p generator to generate \p n unsigned int results into device memory at
920
+ * \p outputPtr. The device memory must have been previously allocated and must be
921
+ * large enough to hold all the results. Launches are done with the stream
922
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
923
+ *
924
+ * Results are 32-bit unsigned int point values with Poisson distribution, with lambda \p lambda.
925
+ *
926
+ * \param generator - Generator to use
927
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
928
+ * Pointer to host memory to store CPU-generated results
929
+ * \param n - Number of unsigned ints to generate
930
+ * \param lambda - lambda for the Poisson distribution
931
+ *
932
+ * \return
933
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
934
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
935
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
936
+ * a previous kernel launch \n
937
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
938
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
939
+ * not a multiple of the quasirandom dimension\n
940
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU or sm does not support double precision \n
941
+ * - CURAND_STATUS_OUT_OF_RANGE if lambda is non-positive or greater than 400,000 \n
942
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
943
+ */
944
+
945
+ curandStatus_t CURANDAPI
946
+ curandGeneratePoisson(curandGenerator_t generator, unsigned int *outputPtr,
947
+ size_t n, double lambda);
948
+ // just for internal usage
949
+ curandStatus_t CURANDAPI
950
+ curandGeneratePoissonMethod(curandGenerator_t generator, unsigned int *outputPtr,
951
+ size_t n, double lambda, curandMethod_t method);
952
+
953
+
954
+ curandStatus_t CURANDAPI
955
+ curandGenerateBinomial(curandGenerator_t generator, unsigned int *outputPtr,
956
+ size_t num, unsigned int n, double p);
957
+ // just for internal usage
958
+ curandStatus_t CURANDAPI
959
+ curandGenerateBinomialMethod(curandGenerator_t generator,
960
+ unsigned int *outputPtr,
961
+ size_t num, unsigned int n, double p,
962
+ curandMethod_t method);
963
+
964
+
965
+ /**
966
+ * \brief Setup starting states.
967
+ *
968
+ * Generate the starting state of the generator. This function is
969
+ * automatically called by generation functions such as
970
+ * ::curandGenerate() and ::curandGenerateUniform().
971
+ * It can be called manually for performance testing reasons to separate
972
+ * timings for starting state generation and random number generation.
973
+ *
974
+ * \param generator - Generator to update
975
+ *
976
+ * \return
977
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
978
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
979
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
980
+ * a previous kernel launch \n
981
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
982
+ * - CURAND_STATUS_SUCCESS if the seeds were generated successfully \n
983
+ */
984
+ curandStatus_t CURANDAPI
985
+ curandGenerateSeeds(curandGenerator_t generator);
986
+
987
+ /**
988
+ * \brief Get direction vectors for 32-bit quasirandom number generation.
989
+ *
990
+ * Get a pointer to an array of direction vectors that can be used
991
+ * for quasirandom number generation. The resulting pointer will
992
+ * reference an array of direction vectors in host memory.
993
+ *
994
+ * The array contains vectors for many dimensions. Each dimension
995
+ * has 32 vectors. Each individual vector is an unsigned int.
996
+ *
997
+ * Legal values for \p set are:
998
+ * - CURAND_DIRECTION_VECTORS_32_JOEKUO6 (20,000 dimensions)
999
+ * - CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6 (20,000 dimensions)
1000
+ *
1001
+ * \param vectors - Address of pointer in which to return direction vectors
1002
+ * \param set - Which set of direction vectors to use
1003
+ *
1004
+ * \return
1005
+ * - CURAND_STATUS_OUT_OF_RANGE if the choice of set is invalid \n
1006
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
1007
+ */
1008
+ curandStatus_t CURANDAPI
1009
+ curandGetDirectionVectors32(curandDirectionVectors32_t *vectors[], curandDirectionVectorSet_t set);
1010
+
1011
+ /**
1012
+ * \brief Get scramble constants for 32-bit scrambled Sobol' .
1013
+ *
1014
+ * Get a pointer to an array of scramble constants that can be used
1015
+ * for quasirandom number generation. The resulting pointer will
1016
+ * reference an array of unsinged ints in host memory.
1017
+ *
1018
+ * The array contains constants for many dimensions. Each dimension
1019
+ * has a single unsigned int constant.
1020
+ *
1021
+ * \param constants - Address of pointer in which to return scramble constants
1022
+ *
1023
+ * \return
1024
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
1025
+ */
1026
+ curandStatus_t CURANDAPI
1027
+ curandGetScrambleConstants32(unsigned int * * constants);
1028
+
1029
+ /**
1030
+ * \brief Get direction vectors for 64-bit quasirandom number generation.
1031
+ *
1032
+ * Get a pointer to an array of direction vectors that can be used
1033
+ * for quasirandom number generation. The resulting pointer will
1034
+ * reference an array of direction vectors in host memory.
1035
+ *
1036
+ * The array contains vectors for many dimensions. Each dimension
1037
+ * has 64 vectors. Each individual vector is an unsigned long long.
1038
+ *
1039
+ * Legal values for \p set are:
1040
+ * - CURAND_DIRECTION_VECTORS_64_JOEKUO6 (20,000 dimensions)
1041
+ * - CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6 (20,000 dimensions)
1042
+ *
1043
+ * \param vectors - Address of pointer in which to return direction vectors
1044
+ * \param set - Which set of direction vectors to use
1045
+ *
1046
+ * \return
1047
+ * - CURAND_STATUS_OUT_OF_RANGE if the choice of set is invalid \n
1048
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
1049
+ */
1050
+ curandStatus_t CURANDAPI
1051
+ curandGetDirectionVectors64(curandDirectionVectors64_t *vectors[], curandDirectionVectorSet_t set);
1052
+
1053
+ /**
1054
+ * \brief Get scramble constants for 64-bit scrambled Sobol' .
1055
+ *
1056
+ * Get a pointer to an array of scramble constants that can be used
1057
+ * for quasirandom number generation. The resulting pointer will
1058
+ * reference an array of unsinged long longs in host memory.
1059
+ *
1060
+ * The array contains constants for many dimensions. Each dimension
1061
+ * has a single unsigned long long constant.
1062
+ *
1063
+ * \param constants - Address of pointer in which to return scramble constants
1064
+ *
1065
+ * \return
1066
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
1067
+ */
1068
+ curandStatus_t CURANDAPI
1069
+ curandGetScrambleConstants64(unsigned long long * * constants);
1070
+
1071
+ /** @} */
1072
+
1073
+ #endif // __CUDACC_RTC__
1074
+
1075
+ #if defined(__cplusplus)
1076
+ }
1077
+ #endif /* __cplusplus */
1078
+
1079
+
1080
+ #endif /* !defined(CURAND_H_) */