BryanW commited on Mar 23

Commit

2ce59c4

verified ·

1 Parent(s): 5e74fae

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/curand_lognormal.h +698 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/curand_mtgp32_host.h +516 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExt.h +1668 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtCounters.h +311 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtCuda.h +164 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtCudaRt.h +139 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtMem.h +749 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtMemCudaRt.h +217 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtOpenCL.h +213 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtPayload.h +1478 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtPayloadHelper.h +192 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSemanticsCounters.h +132 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSemanticsScope.h +50 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSemanticsTime.h +49 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSync.h +406 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtx3.hpp +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtHelperMacros.h +64 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImpl.h +123 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplCounters_v1.h +166 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplMemCudaRt_v1.h +72 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplMem_v1.h +168 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplPayload_v1.h +265 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtInit.h +437 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtPayloadHelperInternal.h +294 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtPayloadTypeInfo.h +189 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtTypes.h +66 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImpl.h +464 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplCore.h +432 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplCudaRt_v3.h +128 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplCuda_v3.h +156 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplOpenCL_v3.h +239 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplSync_v3.h +124 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxInit.h +468 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxInitDecls.h +103 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxInitDefs.h +595 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxLinkOnce.h +88 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxTypes.h +318 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/lib/libcufile_rdma.so.1 +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/lib/libnvtx3interop.so.1 +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/__pycache__/__init__.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/include/__init__.py +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/include/__pycache__/__init__.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/include/cufile.h +740 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/lib/__init__.py +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/lib/__pycache__/__init__.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/lib/libcufile_rdma.so.1 +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/__pycache__/__init__.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/include/__init__.py +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/include/__pycache__/__init__.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/include/curand.h +1080 -0

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/curand_lognormal.h ADDED Viewed

	@@ -0,0 +1,698 @@

+ /* Copyright 2010-2014 NVIDIA Corporation.  All rights reserved.
+  *
+  * NOTICE TO LICENSEE:
+  *
+  * The source code and/or documentation ("Licensed Deliverables") are
+  * subject to NVIDIA intellectual property rights under U.S. and
+  * international Copyright laws.
+  *
+  * The Licensed Deliverables contained herein are PROPRIETARY and
+  * CONFIDENTIAL to NVIDIA and are being provided under the terms and
+  * conditions of a form of NVIDIA software license agreement by and
+  * between NVIDIA and Licensee ("License Agreement") or electronically
+  * accepted by Licensee.  Notwithstanding any terms or conditions to
+  * the contrary in the License Agreement, reproduction or disclosure
+  * of the Licensed Deliverables to any third party without the express
+  * written consent of NVIDIA is prohibited.
+  *
+  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  THEY ARE
+  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+  * OF THESE LICENSED DELIVERABLES.
+  *
+  * U.S. Government End Users.  These Licensed Deliverables are a
+  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+  * 1995), consisting of "commercial computer software" and "commercial
+  * computer software documentation" as such terms are used in 48
+  * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government
+  * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+  * U.S. Government End Users acquire the Licensed Deliverables with
+  * only those rights set forth herein.
+  *
+  * Any use of the Licensed Deliverables in individual and commercial
+  * software must include, in the user documentation and internal
+  * comments to the code, the above Disclaimer and U.S. Government End
+  * Users Notice.
+  */
+#if !defined(CURAND_LOGNORMAL_H_)
+#define CURAND_LOGNORMAL_H_
+/**
+ * \defgroup DEVICE Device API
+ *
+ * @{
+ */
+#ifndef __CUDACC_RTC__
+#include <math.h>
+#endif // __CUDACC_RTC__
+#include "curand_mrg32k3a.h"
+#include "curand_mtgp32_kernel.h"
+#include "curand_philox4x32_x.h"
+/**
+ * \brief Return a log-normally distributed float from an XORWOW generator.
+ *
+ * Return a single log-normally distributed float derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the XORWOW generator in \p state,
+ * increment position of generator by one.
+ *
+ * The implementation uses a Box-Muller transform to generate two
+ * normally distributed results, transforms them to log-normal distribution,
+ * then returns them one at a time.
+ * See ::curand_log_normal2() for a more efficient version that returns
+ * both results at once.
+ *
+ * \param state  - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS float curand_log_normal(curandStateXORWOW_t *state, float mean, float stddev)
+{
+    if(state->boxmuller_flag != EXTRA_FLAG_LOG_NORMAL) {
+        unsigned int x, y;
+        x = curand(state);
+        y = curand(state);
+        float2 v = _curand_box_muller(x, y);
+        state->boxmuller_extra = expf(mean + (stddev * v.y));
+        state->boxmuller_flag = EXTRA_FLAG_LOG_NORMAL;
+        return expf(mean + (stddev * v.x));
+    }
+    state->boxmuller_flag = 0;
+    return state->boxmuller_extra;
+}
+/**
+ * \brief Return a log-normally distributed float from an Philox4_32_10 generator.
+ *
+ * Return a single log-normally distributed float derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the Philox4_32_10 generator in \p state,
+ * increment position of generator by one.
+ *
+ * The implementation uses a Box-Muller transform to generate two
+ * normally distributed results, transforms them to log-normal distribution,
+ * then returns them one at a time.
+ * See ::curand_log_normal2() for a more efficient version that returns
+ * both results at once.
+ *
+ * \param state  - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS float curand_log_normal(curandStatePhilox4_32_10_t *state, float mean, float stddev)
+{
+    if(state->boxmuller_flag != EXTRA_FLAG_LOG_NORMAL) {
+        unsigned int x, y;
+        x = curand(state);
+        y = curand(state);
+        float2 v = _curand_box_muller(x, y);
+        state->boxmuller_extra = expf(mean + (stddev * v.y));
+        state->boxmuller_flag = EXTRA_FLAG_LOG_NORMAL;
+        return expf(mean + (stddev * v.x));
+    }
+    state->boxmuller_flag = 0;
+    return state->boxmuller_extra;
+}
+/**
+ * \brief Return two normally distributed floats from an XORWOW generator.
+ *
+ * Return two log-normally distributed floats derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the XORWOW generator in \p state,
+ * increment position of generator by two.
+ *
+ * The implementation uses a Box-Muller transform to generate two
+ * normally distributed results, then transforms them to log-normal.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed float2 where each element is from a
+ * distribution with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS float2 curand_log_normal2(curandStateXORWOW_t *state, float mean, float stddev)
+{
+    float2 v = curand_box_muller(state);
+    v.x = expf(mean + (stddev * v.x));
+    v.y = expf(mean + (stddev * v.y));
+    return v;
+}
+/**
+ * \brief Return two normally distributed floats from an Philox4_32_10 generator.
+ *
+ * Return two log-normally distributed floats derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the Philox4_32_10 generator in \p state,
+ * increment position of generator by two.
+ *
+ * The implementation uses a Box-Muller transform to generate two
+ * normally distributed results, then transforms them to log-normal.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed float2 where each element is from a
+ * distribution with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS float2 curand_log_normal2(curandStatePhilox4_32_10_t *state, float mean, float stddev)
+{
+    float2 v = curand_box_muller(state);
+    v.x = expf(mean + (stddev * v.x));
+    v.y = expf(mean + (stddev * v.y));
+    return v;
+}
+/**
+ * \brief Return four normally distributed floats from an Philox4_32_10 generator.
+ *
+ * Return four log-normally distributed floats derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the Philox4_32_10 generator in \p state,
+ * increment position of generator by four.
+ *
+ * The implementation uses a Box-Muller transform to generate two
+ * normally distributed results, then transforms them to log-normal.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed float4 where each element is from a
+ * distribution with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS float4 curand_log_normal4(curandStatePhilox4_32_10_t *state, float mean, float stddev)
+{
+    float4 v = curand_box_muller4(state);
+    v.x = expf(mean + (stddev * v.x));
+    v.y = expf(mean + (stddev * v.y));
+    v.z = expf(mean + (stddev * v.z));
+    v.w = expf(mean + (stddev * v.w));
+    return v;
+}
+/**
+ * \brief Return a log-normally distributed float from an MRG32k3a generator.
+ *
+ * Return a single log-normally distributed float derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the MRG32k3a generator in \p state,
+ * increment position of generator by one.
+ *
+ * The implementation uses a Box-Muller transform to generate two
+ * normally distributed results, transforms them to log-normal distribution,
+ * then returns them one at a time.
+ * See ::curand_log_normal2() for a more efficient version that returns
+ * both results at once.
+ *
+ * \param state  - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS float curand_log_normal(curandStateMRG32k3a_t *state, float mean, float stddev)
+{
+    if(state->boxmuller_flag != EXTRA_FLAG_LOG_NORMAL) {
+        float2 v = curand_box_muller_mrg(state);
+        state->boxmuller_extra = expf(mean + (stddev * v.y));
+        state->boxmuller_flag = EXTRA_FLAG_LOG_NORMAL;
+        return expf(mean + (stddev * v.x));
+    }
+    state->boxmuller_flag = 0;
+    return state->boxmuller_extra;
+}
+/**
+ * \brief Return two normally distributed floats from an MRG32k3a generator.
+ *
+ * Return two log-normally distributed floats derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the MRG32k3a generator in \p state,
+ * increment position of generator by two.
+ *
+ * The implementation uses a Box-Muller transform to generate two
+ * normally distributed results, then transforms them to log-normal.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed float2 where each element is from a
+ * distribution with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS float2 curand_log_normal2(curandStateMRG32k3a_t *state, float mean, float stddev)
+{
+    float2 v = curand_box_muller_mrg(state);
+    v.x = expf(mean + (stddev * v.x));
+    v.y = expf(mean + (stddev * v.y));
+    return v;
+}
+/**
+ * \brief Return a log-normally distributed float from an MTGP32 generator.
+ *
+ * Return a single log-normally distributed float derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the MTGP32 generator in \p state,
+ * increment position of generator.
+ *
+ * The implementation uses the inverse cumulative distribution function
+ * to generate a normally distributed result, then transforms the result
+ * to log-normal.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS float curand_log_normal(curandStateMtgp32_t *state, float mean, float stddev)
+{
+    return expf(mean + (stddev * _curand_normal_icdf(curand(state))));
+}
+/**
+ * \brief Return a log-normally distributed float from a Sobol32 generator.
+ *
+ * Return a single log-normally distributed float derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the Sobol32 generator in \p state,
+ * increment position of generator by one.
+ *
+ * The implementation uses the inverse cumulative distribution function
+ * to generate a normally distributed result, then transforms the result
+ * to log-normal.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS float curand_log_normal(curandStateSobol32_t *state, float mean, float stddev)
+{
+    return expf(mean + (stddev * _curand_normal_icdf(curand(state))));
+}
+/**
+ * \brief Return a log-normally distributed float from a scrambled Sobol32 generator.
+ *
+ * Return a single log-normally distributed float derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the scrambled Sobol32 generator in \p state,
+ * increment position of generator by one.
+ *
+ * The implementation uses the inverse cumulative distribution function
+ * to generate a normally distributed result, then transforms the result
+ * to log-normal.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS float curand_log_normal(curandStateScrambledSobol32_t *state, float mean, float stddev)
+{
+    return expf(mean + (stddev * _curand_normal_icdf(curand(state))));
+}
+/**
+ * \brief Return a log-normally distributed float from a Sobol64 generator.
+ *
+ * Return a single log-normally distributed float derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the Sobol64 generator in \p state,
+ * increment position of generator by one.
+ *
+ * The implementation uses the inverse cumulative distribution function
+ * to generate normally distributed results, then converts to log-normal
+ * distribution.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS float curand_log_normal(curandStateSobol64_t *state, float mean, float stddev)
+{
+    return expf(mean + (stddev * _curand_normal_icdf(curand(state))));
+}
+/**
+ * \brief Return a log-normally distributed float from a scrambled Sobol64 generator.
+ *
+ * Return a single log-normally distributed float derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the scrambled Sobol64 generator in \p state,
+ * increment position of generator by one.
+ *
+ * The implementation uses the inverse cumulative distribution function
+ * to generate normally distributed results, then converts to log-normal
+ * distribution.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed float with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS float curand_log_normal(curandStateScrambledSobol64_t *state, float mean, float stddev)
+{
+    return expf(mean + (stddev * _curand_normal_icdf(curand(state))));
+}
+/**
+ * \brief Return a log-normally distributed double from an XORWOW generator.
+ *
+ * Return a single normally distributed double derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the XORWOW generator in \p state,
+ * increment position of generator.
+ *
+ * The implementation uses a Box-Muller transform to generate two
+ * normally distributed results, transforms them to log-normal distribution,
+ * then returns them one at a time.
+ * See ::curand_log_normal2_double() for a more efficient version that returns
+ * both results at once.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS double curand_log_normal_double(curandStateXORWOW_t *state, double mean, double stddev)
+{
+    if(state->boxmuller_flag_double != EXTRA_FLAG_LOG_NORMAL) {
+        unsigned int x0, x1, y0, y1;
+        x0 = curand(state);
+        x1 = curand(state);
+        y0 = curand(state);
+        y1 = curand(state);
+        double2 v = _curand_box_muller_double(x0, x1, y0, y1);
+        state->boxmuller_extra_double = exp(mean + (stddev * v.y));
+        state->boxmuller_flag_double = EXTRA_FLAG_LOG_NORMAL;
+        return exp(mean + (stddev * v.x));
+    }
+    state->boxmuller_flag_double = 0;
+    return state->boxmuller_extra_double;
+}
+/**
+ * \brief Return a log-normally distributed double from an Philox4_32_10 generator.
+ *
+ * Return a single normally distributed double derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the Philox4_32_10 generator in \p state,
+ * increment position of generator.
+ *
+ * The implementation uses a Box-Muller transform to generate two
+ * normally distributed results, transforms them to log-normal distribution,
+ * then returns them one at a time.
+ * See ::curand_log_normal2_double() for a more efficient version that returns
+ * both results at once.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS double curand_log_normal_double(curandStatePhilox4_32_10_t *state, double mean, double stddev)
+{
+    if(state->boxmuller_flag_double != EXTRA_FLAG_LOG_NORMAL) {
+        uint4 _x;
+        _x = curand4(state);
+        double2 v = _curand_box_muller_double(_x.x, _x.y, _x.z, _x.w);
+        state->boxmuller_extra_double = exp(mean + (stddev * v.y));
+        state->boxmuller_flag_double = EXTRA_FLAG_LOG_NORMAL;
+        return exp(mean + (stddev * v.x));
+    }
+    state->boxmuller_flag_double = 0;
+    return state->boxmuller_extra_double;
+}
+/**
+ * \brief Return two log-normally distributed doubles from an XORWOW generator.
+ *
+ * Return two log-normally distributed doubles derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the XORWOW generator in \p state,
+ * increment position of generator by two.
+ *
+ * The implementation uses a Box-Muller transform to generate two
+ * normally distributed results, and transforms them to log-normal distribution,.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed double2 where each element is from a
+ * distribution with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS double2 curand_log_normal2_double(curandStateXORWOW_t *state, double mean, double stddev)
+{
+    double2 v = curand_box_muller_double(state);
+    v.x = exp(mean + (stddev * v.x));
+    v.y = exp(mean + (stddev * v.y));
+    return v;
+}
+/**
+ * \brief Return two log-normally distributed doubles from an Philox4_32_10 generator.
+ *
+ * Return two log-normally distributed doubles derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the Philox4_32_10 generator in \p state,
+ * increment position of generator by four.
+ *
+ * The implementation uses a Box-Muller transform to generate two
+ * normally distributed results, and transforms them to log-normal distribution,.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed double4 where each element is from a
+ * distribution with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS double2 curand_log_normal2_double(curandStatePhilox4_32_10_t *state, double mean, double stddev)
+{
+    double2 v = curand_box_muller2_double(state);
+    v.x = exp(mean + (stddev * v.x));
+    v.y = exp(mean + (stddev * v.y));
+    return v;
+}
+// nor part of API
+__NV_SILENCE_DEPRECATION_BEGIN
+QUALIFIERS double4 curand_log_normal4_double(curandStatePhilox4_32_10_t *state, double mean, double stddev)
+{
+    double4 v = curand_box_muller4_double(state);
+    v.x = exp(mean + (stddev * v.x));
+    v.y = exp(mean + (stddev * v.y));
+    v.z = exp(mean + (stddev * v.z));
+    v.w = exp(mean + (stddev * v.w));
+    return v;
+}
+__NV_SILENCE_DEPRECATION_END
+/**
+ * \brief Return a log-normally distributed double from an MRG32k3a generator.
+ *
+ * Return a single normally distributed double derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the MRG32k3a generator in \p state,
+ * increment position of generator.
+ *
+ * The implementation uses a Box-Muller transform to generate two
+ * normally distributed results, transforms them to log-normal distribution,
+ * then returns them one at a time.
+ * See ::curand_log_normal2_double() for a more efficient version that returns
+ * both results at once.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS double curand_log_normal_double(curandStateMRG32k3a_t *state, double mean, double stddev)
+{
+    if(state->boxmuller_flag_double != EXTRA_FLAG_LOG_NORMAL) {
+        double2 v = curand_box_muller_mrg_double(state);
+        state->boxmuller_extra_double = exp(mean + (stddev * v.y));
+        state->boxmuller_flag_double = EXTRA_FLAG_LOG_NORMAL;
+        return exp(mean + (stddev * v.x));
+    }
+    state->boxmuller_flag_double = 0;
+    return state->boxmuller_extra_double;
+}
+/**
+ * \brief Return two log-normally distributed doubles from an MRG32k3a generator.
+ *
+ * Return two log-normally distributed doubles derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the MRG32k3a generator in \p state,
+ * increment position of generator by two.
+ *
+ * The implementation uses a Box-Muller transform to generate two
+ * normally distributed results, and transforms them to log-normal distribution,.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed double2 where each element is from a
+ * distribution with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS double2 curand_log_normal2_double(curandStateMRG32k3a_t *state, double mean, double stddev)
+{
+    double2 v = curand_box_muller_mrg_double(state);
+    v.x = exp(mean + (stddev * v.x));
+    v.y = exp(mean + (stddev * v.y));
+    return v;
+}
+/**
+ * \brief Return a log-normally distributed double from an MTGP32 generator.
+ *
+ * Return a single log-normally distributed double derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the MTGP32 generator in \p state,
+ * increment position of generator.
+ *
+ * The implementation uses the inverse cumulative distribution function
+ * to generate normally distributed results, and transforms them into
+ * log-normal distribution.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS double curand_log_normal_double(curandStateMtgp32_t *state, double mean, double stddev)
+{
+    return exp(mean + (stddev * _curand_normal_icdf_double(curand(state))));
+}
+/**
+ * \brief Return a log-normally distributed double from a Sobol32 generator.
+ *
+ * Return a single log-normally distributed double derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the Sobol32 generator in \p state,
+ * increment position of generator by one.
+ *
+ * The implementation uses the inverse cumulative distribution function
+ * to generate normally distributed results, and transforms them into
+ * log-normal distribution.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS double curand_log_normal_double(curandStateSobol32_t *state, double mean, double stddev)
+{
+    return exp(mean + (stddev * _curand_normal_icdf_double(curand(state))));
+}
+/**
+ * \brief Return a log-normally distributed double from a scrambled Sobol32 generator.
+ *
+ * Return a single log-normally distributed double derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the scrambled Sobol32 generator in \p state,
+ * increment position of generator by one.
+ *
+ * The implementation uses the inverse cumulative distribution function
+ * to generate normally distributed results, and transforms them into
+ * log-normal distribution.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS double curand_log_normal_double(curandStateScrambledSobol32_t *state, double mean, double stddev)
+{
+    return exp(mean + (stddev * _curand_normal_icdf_double(curand(state))));
+}
+/**
+ * \brief Return a log-normally distributed double from a Sobol64 generator.
+ *
+ * Return a single normally distributed double derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the Sobol64 generator in \p state,
+ * increment position of generator by one.
+ *
+ * The implementation uses the inverse cumulative distribution function
+ * to generate normally distributed results.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS double curand_log_normal_double(curandStateSobol64_t *state, double mean, double stddev)
+{
+    return exp(mean + (stddev * _curand_normal_icdf_double(curand(state))));
+}
+/**
+ * \brief Return a log-normally distributed double from a scrambled Sobol64 generator.
+ *
+ * Return a single normally distributed double derived from a normal
+ * distribution with mean \p mean and standard deviation \p stddev
+ * from the scrambled Sobol64 generator in \p state,
+ * increment position of generator by one.
+ *
+ * The implementation uses the inverse cumulative distribution function
+ * to generate normally distributed results.
+ *
+ * \param state - Pointer to state to update
+ * \param mean   - Mean of the related normal distribution
+ * \param stddev - Standard deviation of the related normal distribution
+ *
+ * \return Log-normally distributed double with mean \p mean and standard deviation \p stddev
+ */
+QUALIFIERS double curand_log_normal_double(curandStateScrambledSobol64_t *state, double mean, double stddev)
+{
+    return exp(mean + (stddev * _curand_normal_icdf_double(curand(state))));
+}
+#endif // !defined(CURAND_LOGNORMAL_H_)

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/curand_mtgp32_host.h ADDED Viewed

	@@ -0,0 +1,516 @@

+/*
+ * Copyright 2010-2014 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+/*
+ * curand_mtgp32_host.h
+ *
+ *
+ * MTGP32-11213
+ *
+ * Mersenne Twister RNG for the GPU
+ *
+ * The period of generated integers is 2<sup>11213</sup>-1.
+ *
+ * This code generates 32-bit unsigned integers, and
+ * single precision floating point numbers uniformly distributed
+ * in the range [1, 2). (float r; 1.0 <= r < 2.0)
+ */
+/*
+ * Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima
+ * University.  All rights reserved.
+ * Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima
+ * University and University of Tokyo.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *     * Neither the name of the Hiroshima University nor the names of
+ *       its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written
+ *       permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#if !defined CURAND_MTGP32_HOST_H
+#define CURAND_MTGP32_HOST_H
+#if !defined(QUALIFIERS)
+#define QUALIFIERS static inline __device__
+#endif
+#include <cuda_runtime.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <string.h>
+#include "curand.h"
+#include "curand_mtgp32.h"
+#include "curand_mtgp32dc_p_11213.h"
+/**
+ * \addtogroup DEVICE Device API
+ *
+ * @{
+ */
+static const unsigned int non_zero = 0x4d544750;
+/*
+ * This function represents a function used in the initialization
+ * by mtgp32_init_by_array() and mtgp32_init_by_str().
+ * @param[in] x 32-bit integer
+ * @return 32-bit integer
+ */
+static __forceinline__ unsigned int ini_func1(unsigned int x) {
+    return (x ^ (x >> 27)) * (1664525);
+}
+/*
+ * This function represents a function used in the initialization
+ * by mtgp32_init_by_array() and mtgp32_init_by_str().
+ * @param[in] x 32-bit integer
+ * @return 32-bit integer
+ */
+static __forceinline__ unsigned int ini_func2(unsigned int x) {
+    return (x ^ (x >> 27)) * (1566083941);
+}
+/*
+ * This function initializes the internal state array with a 32-bit
+ * integer seed. The allocated memory should be freed by calling
+ * mtgp32_free(). \b para should be one of the elements in the
+ * parameter table (mtgp32-param-ref.c).
+ *
+ * This function is call by cuda program, because cuda program uses
+ * another structure and another allocation method.
+ *
+ * @param[out] array MTGP internal status vector.
+ * @param[in] para parameter structure
+ * @param[in] seed a 32-bit integer used as the seed.
+ */
+static __forceinline__ __host__
+void mtgp32_init_state(unsigned int state[],
+                       const mtgp32_params_fast_t *para, unsigned int seed) {
+    int i;
+    int size = para->mexp / 32 + 1;
+    unsigned int hidden_seed;
+    unsigned int tmp;
+    hidden_seed = para->tbl[4] ^ (para->tbl[8] << 16);
+    tmp = hidden_seed;
+    tmp += tmp >> 16;
+    tmp += tmp >> 8;
+    memset(state, tmp & 0xff, sizeof(unsigned int) * size);
+    state[0] = seed;
+    state[1] = hidden_seed;
+    for (i = 1; i < size; i++) {
+        state[i] ^= (1812433253) * (state[i - 1] ^ (state[i - 1] >> 30)) + i;
+    }
+}
+/*
+ * This function initializes the internal state array
+ * with a 32-bit integer array. \b para should be one of the elements in
+ * the parameter table (mtgp32-param-ref.c).
+ *
+ * @param[out] mtgp32 MTGP structure.
+ * @param[in] para parameter structure
+ * @param[in] array a 32-bit integer array used as a seed.
+ * @param[in] length length of the array.
+ * @return CURAND_STATUS_SUCCESS
+ */
+static __forceinline__ __host__
+int mtgp32_init_by_array(unsigned int state[],
+                         const mtgp32_params_fast_t *para,
+                         unsigned int *array, int length) {
+    int i, j, count;
+    unsigned int r;
+    int lag;
+    int mid;
+    int size = para->mexp / 32 + 1;
+    unsigned int hidden_seed;
+    unsigned int tmp;
+    if (size >= 623) {
+    lag = 11;
+    } else if (size >= 68) {
+    lag = 7;
+    } else if (size >= 39) {
+    lag = 5;
+    } else {
+    lag = 3;
+    }
+    mid = (size - lag) / 2;
+    hidden_seed = para->tbl[4] ^ (para->tbl[8] << 16);
+    tmp = hidden_seed;
+    tmp += tmp >> 16;
+    tmp += tmp >> 8;
+    memset(state, tmp & 0xff, sizeof(unsigned int) * size);
+    state[0] = hidden_seed;
+    if (length + 1 > size) {
+    count = length + 1;
+    } else {
+    count = size;
+    }
+    r = ini_func1(state[0] ^ state[mid] ^ state[size - 1]);
+    state[mid] += r;
+    r += length;
+    state[(mid + lag) % size] += r;
+    state[0] = r;
+    i = 1;
+    count--;
+    for (i = 1, j = 0; (j < count) && (j < length); j++) {
+    r = ini_func1(state[i] ^ state[(i + mid) % size]
+              ^ state[(i + size - 1) % size]);
+    state[(i + mid) % size] += r;
+    r += array[j] + i;
+    state[(i + mid + lag) % size] += r;
+    state[i] = r;
+    i = (i + 1) % size;
+    }
+    for (; j < count; j++) {
+    r = ini_func1(state[i] ^ state[(i + mid) % size]
+              ^ state[(i + size - 1) % size]);
+    state[(i + mid) % size] += r;
+    r += i;
+    state[(i + mid + lag) % size] += r;
+    state[i] = r;
+    i = (i + 1) % size;
+    }
+    for (j = 0; j < size; j++) {
+    r = ini_func2(state[i] + state[(i + mid) % size]
+              + state[(i + size - 1) % size]);
+    state[(i + mid) % size] ^= r;
+    r -= i;
+    state[(i + mid + lag) % size] ^= r;
+    state[i] = r;
+    i = (i + 1) % size;
+    }
+    if (state[size - 1] == 0) {
+    state[size - 1] = non_zero;
+    }
+    return 0;
+}
+/*
+ * This function initializes the internal state array
+ * with a character array. \b para should be one of the elements in
+ * the parameter table (mtgp32-param-ref.c).
+ * This is the same algorithm with mtgp32_init_by_array(), but hope to
+ * be more useful.
+ *
+ * @param[out] mtgp32 MTGP structure.
+ * @param[in] para parameter structure
+ * @param[in] array a character array used as a seed. (terminated by zero.)
+ * @return memory allocation result. if 0 then O.K.
+ */
+static __forceinline__ __host__
+int mtgp32_init_by_str(unsigned int state[],
+                       const mtgp32_params_fast_t *para, unsigned char *array) {
+    int i, j, count;
+    unsigned int r;
+    int lag;
+    int mid;
+    int size = para->mexp / 32 + 1;
+    int length = (unsigned int)strlen((char *)array);
+    unsigned int hidden_seed;
+    unsigned int tmp;
+    if (size >= 623) {
+    lag = 11;
+    } else if (size >= 68) {
+    lag = 7;
+    } else if (size >= 39) {
+    lag = 5;
+    } else {
+    lag = 3;
+    }
+    mid = (size - lag) / 2;
+    hidden_seed = para->tbl[4] ^ (para->tbl[8] << 16);
+    tmp = hidden_seed;
+    tmp += tmp >> 16;
+    tmp += tmp >> 8;
+    memset(state, tmp & 0xff, sizeof(unsigned int) * size);
+    state[0] = hidden_seed;
+    if (length + 1 > size) {
+    count = length + 1;
+    } else {
+    count = size;
+    }
+    r = ini_func1(state[0] ^ state[mid] ^ state[size - 1]);
+    state[mid] += r;
+    r += length;
+    state[(mid + lag) % size] += r;
+    state[0] = r;
+    i = 1;
+    count--;
+    for (i = 1, j = 0; (j < count) && (j < length); j++) {
+    r = ini_func1(state[i] ^ state[(i + mid) % size]
+              ^ state[(i + size - 1) % size]);
+    state[(i + mid) % size] += r;
+    r += array[j] + i;
+    state[(i + mid + lag) % size] += r;
+    state[i] = r;
+    i = (i + 1) % size;
+    }
+    for (; j < count; j++) {
+    r = ini_func1(state[i] ^ state[(i + mid) % size]
+              ^ state[(i + size - 1) % size]);
+    state[(i + mid) % size] += r;
+    r += i;
+    state[(i + mid + lag) % size] += r;
+    state[i] = r;
+    i = (i + 1) % size;
+    }
+    for (j = 0; j < size; j++) {
+    r = ini_func2(state[i] + state[(i + mid) % size]
+              + state[(i + size - 1) % size]);
+    state[(i + mid) % size] ^= r;
+    r -= i;
+    state[(i + mid + lag) % size] ^= r;
+    state[i] = r;
+    i = (i + 1) % size;
+    }
+    if (state[size - 1] == 0) {
+    state[size - 1] = non_zero;
+    }
+    return 0;
+}
+template<typename ParamsType>
+static __forceinline__ __host__
+curandStatus_t curandMakeMTGP32ConstantsImpl(const mtgp32_params_fast_t params[], ParamsType * p, const int block_num)
+{
+    const int size1 = sizeof(unsigned int) * block_num;
+    const int size2 = sizeof(unsigned int) * block_num * TBL_SIZE;
+    unsigned int *h_pos_tbl;
+    unsigned int *h_sh1_tbl;
+    unsigned int *h_sh2_tbl;
+    unsigned int *h_param_tbl;
+    unsigned int *h_temper_tbl;
+    unsigned int *h_single_temper_tbl;
+    unsigned int *h_mask;
+    curandStatus_t status = CURAND_STATUS_SUCCESS;
+    h_pos_tbl = (unsigned int *)malloc(size1);
+    h_sh1_tbl = (unsigned int *)malloc(size1);
+    h_sh2_tbl = (unsigned int *)malloc(size1);
+    h_param_tbl = (unsigned int *)malloc(size2);
+    h_temper_tbl = (unsigned int *)malloc(size2);
+    h_single_temper_tbl = (unsigned int *)malloc(size2);
+    h_mask = (unsigned int *)malloc(sizeof(unsigned int));
+    if (h_pos_tbl == NULL
+	    || h_sh1_tbl == NULL
+	    || h_sh2_tbl == NULL
+	    || h_param_tbl == NULL
+	    || h_temper_tbl == NULL
+	    || h_single_temper_tbl == NULL
+	    || h_mask == NULL) {
+        if (h_pos_tbl != NULL) free(h_pos_tbl);
+        if (h_sh1_tbl != NULL) free(h_sh1_tbl);
+        if (h_sh2_tbl != NULL) free(h_sh2_tbl);
+        if (h_param_tbl != NULL) free(h_param_tbl);
+        if (h_temper_tbl != NULL) free(h_temper_tbl);
+        if (h_single_temper_tbl != NULL) free(h_single_temper_tbl);
+        if (h_mask != NULL) free(h_mask);
+        status = CURAND_STATUS_ALLOCATION_FAILED;
+    } else {
+        h_mask[0] = params[0].mask;
+        for (int i = 0; i < block_num; i++) {
+	        h_pos_tbl[i] = params[i].pos;
+	        h_sh1_tbl[i] = params[i].sh1;
+	        h_sh2_tbl[i] = params[i].sh2;
+	        for (int j = 0; j < TBL_SIZE; j++) {
+	            h_param_tbl[i * TBL_SIZE + j] = params[i].tbl[j];
+	            h_temper_tbl[i * TBL_SIZE + j] = params[i].tmp_tbl[j];
+	            h_single_temper_tbl[i * TBL_SIZE + j] = params[i].flt_tmp_tbl[j];
+	        }
+        }
+        if (cudaMemcpy( p->pos_tbl,
+                        h_pos_tbl, size1, cudaMemcpyHostToDevice) != cudaSuccess)
+        {
+            status = CURAND_STATUS_INITIALIZATION_FAILED;
+        } else
+        if (cudaMemcpy( p->sh1_tbl,
+                        h_sh1_tbl, size1, cudaMemcpyHostToDevice) != cudaSuccess)
+        {
+            status = CURAND_STATUS_INITIALIZATION_FAILED;
+        } else
+        if (cudaMemcpy( p->sh2_tbl,
+                        h_sh2_tbl, size1, cudaMemcpyHostToDevice) != cudaSuccess)
+        {
+            status = CURAND_STATUS_INITIALIZATION_FAILED;
+        } else
+        if (cudaMemcpy( p->param_tbl,
+                        h_param_tbl, size2, cudaMemcpyHostToDevice) != cudaSuccess)
+        {
+            status = CURAND_STATUS_INITIALIZATION_FAILED;
+        } else
+        if (cudaMemcpy( p->temper_tbl,
+                        h_temper_tbl, size2, cudaMemcpyHostToDevice) != cudaSuccess)
+        {
+            status = CURAND_STATUS_INITIALIZATION_FAILED;
+        } else
+        if (cudaMemcpy( p->single_temper_tbl,
+                        h_single_temper_tbl, size2, cudaMemcpyHostToDevice) != cudaSuccess)
+        {
+            status = CURAND_STATUS_INITIALIZATION_FAILED;
+        } else
+        if (cudaMemcpy( p->mask,
+                        h_mask, sizeof(unsigned int), cudaMemcpyHostToDevice) != cudaSuccess)
+        {
+            status = CURAND_STATUS_INITIALIZATION_FAILED;
+        }
+    }
+    if (h_pos_tbl != NULL) free(h_pos_tbl);
+    if (h_sh1_tbl != NULL) free(h_sh1_tbl);
+    if (h_sh2_tbl != NULL) free(h_sh2_tbl);
+    if (h_param_tbl != NULL) free(h_param_tbl);
+    if (h_temper_tbl != NULL) free(h_temper_tbl);
+    if (h_single_temper_tbl != NULL)free(h_single_temper_tbl);
+    if (h_mask != NULL) free(h_mask);
+    return status;
+}
+/**
+ * \brief Set up constant parameters for the mtgp32 generator
+ *
+ * This host-side helper function re-organizes CURAND_NUM_MTGP32_PARAMS sets of
+ * generator parameters for use by kernel functions and copies the
+ * result to the specified location in device memory.
+ *
+ * \param params - Pointer to an array of type mtgp32_params_fast_t in host memory
+ * \param p - pointer to a structure of type mtgp32_kernel_params_t in device memory.
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if host memory could not be allocated
+ * - CURAND_STATUS_INITIALIZATION_FAILED if the copy to device memory failed
+ * - CURAND_STATUS_SUCCESS otherwise
+ */
+static __forceinline__ __host__
+curandStatus_t curandMakeMTGP32Constants(const mtgp32_params_fast_t params[], mtgp32_kernel_params_t * p)
+{
+    return curandMakeMTGP32ConstantsImpl(params, p, CURAND_NUM_MTGP32_PARAMS);
+}
+/**
+ * \brief Set up initial states for the mtgp32 generator
+ *
+ * This host-side helper function initializes a number of states (one parameter set per state) for
+ * an mtgp32 generator. To accomplish this it allocates a state array in host memory,
+ * initializes that array, and copies the result to device memory.
+ *
+ * \param s - pointer to an array of states in device memory
+ * \param params - Pointer to an array of type mtgp32_params_fast_t in host memory
+ * \param k - pointer to a structure of type mtgp32_kernel_params_t in device memory
+ * \param n - number of parameter sets/states to initialize
+ * \param seed - seed value
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if host memory state could not be allocated
+ * - CURAND_STATUS_INITIALIZATION_FAILED if the copy to device memory failed
+ * - CURAND_STATUS_SUCCESS otherwise
+ */
+static __forceinline__ __host__
+curandStatus_t CURANDAPI curandMakeMTGP32KernelState(curandStateMtgp32_t *s,
+                                                     mtgp32_params_fast_t params[],
+                                                     mtgp32_kernel_params_t *k,
+                                                     int n,
+                                                     unsigned long long seed)
+{
+    int i;
+    curandStatus_t status = CURAND_STATUS_SUCCESS;
+    curandStateMtgp32_t *h_status =(curandStateMtgp32_t *) malloc(sizeof(curandStateMtgp32_t) * n);
+    if (h_status == NULL) {
+        status = CURAND_STATUS_ALLOCATION_FAILED;
+    } else {
+        seed = seed ^ (seed >> 32);
+        for (i = 0; i < n; i++) {
+            mtgp32_init_state(&(h_status[i].s[0]), &params[i],(unsigned int)seed + i + 1);
+            h_status[i].offset = 0;
+            h_status[i].pIdx = i;
+            h_status[i].k = k;
+        }
+        if (cudaMemcpy(s, h_status,
+                       sizeof(curandStateMtgp32_t) * n,
+                       cudaMemcpyHostToDevice) != cudaSuccess) {
+            status = CURAND_STATUS_INITIALIZATION_FAILED;
+        }
+     }
+    free(h_status);
+    return status;
+}
+/** @} */
+#endif

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExt.h ADDED Viewed

	@@ -0,0 +1,1668 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+/** \file nvToolsExt.h
+ */
+/* ========================================================================= */
+/** \mainpage
+ * \tableofcontents
+ * \section INTRODUCTION Introduction
+ *
+ * The NVIDIA Tools Extension library is a set of functions that a
+ * developer can use to provide additional information to tools.
+ * The additional information is used by the tool to improve
+ * analysis and visualization of data.
+ *
+ * The library introduces close to zero overhead if no tool is
+ * attached to the application.  The overhead when a tool is
+ * attached is specific to the tool.
+ *
+ * \section INITIALIZATION Initialization
+ *
+ * Typically the tool's library that plugs into NVTX is indirectly
+ * loaded via environmental properties that are platform specific.
+ * For some platform or special cases, the user may be required
+ * to instead explicitly initialize instead though.   This can also
+ * be helpful to control when the API loads a tool's library instead
+ * of what would typically be the first function call to emit info.
+ * For these rare case, see \ref INITIALIZATION for additional information.
+ *
+ * \section MARKERS_AND_RANGES Markers and Ranges
+ *
+ * Markers and ranges are used to describe events at a specific time (markers)
+ * or over a time span (ranges) during the execution of the application
+ * respectively.
+ *
+ * \subsection MARKERS Markers
+ *
+ * Markers denote specific moments in time.
+ *
+ *
+ * See \ref DOMAINS and \ref EVENT_ATTRIBUTES for additional information on
+ * how to specify the domain.
+ *
+ * \subsection THREAD_RANGES Thread Ranges
+ *
+ * Thread ranges denote nested time ranges. Nesting is maintained per thread
+ * per domain and does not require any additional correlation mechanism. The
+ * duration of a thread range is defined by the corresponding pair of
+ * nvtxRangePush* to nvtxRangePop API calls.
+ *
+ * See \ref DOMAINS and \ref EVENT_ATTRIBUTES for additional information on
+ * how to specify the domain.
+ *
+ * \subsection PROCESS_RANGES Process Ranges
+ *
+ * Process ranges denote a time span that can expose arbitrary concurrency, as
+ * opposed to thread ranges that only support nesting. In addition the range
+ * start event can happen on a different thread than the end marker. For the
+ * correlation of a start/end pair an unique correlation ID is used that is
+ * returned from the start API call and needs to be passed into the end API
+ * call.
+ *
+ * \subsection EVENT_ATTRIBUTES Event Attributes
+ *
+ * \ref MARKERS_AND_RANGES can be annotated with various attributes to provide
+ * additional information for an event or to guide the tool's visualization of
+ * the data. Each of the attributes is optional and if left unused the
+ * attributes fall back to a default value. The attributes include:
+ * - color
+ * - category
+ *
+ * To specify any attribute other than the text message, the \ref
+ * EVENT_ATTRIBUTE_STRUCTURE "Event Attribute Structure" must be used.
+ *
+ * \section DOMAINS Domains
+ *
+ * Domains enable developers to scope annotations. By default all events and
+ * annotations are in the default domain. Additional domains can be registered.
+ * This allows developers to scope markers, ranges, and resources names to
+ * avoid conflicts.
+ *
+ * The function ::nvtxDomainCreateA or ::nvtxDomainCreateW is used to create
+ * a named domain.
+ *
+ * Each domain maintains its own
+ * - categories
+ * - thread range stacks
+ * - registered strings
+ *
+ * The function ::nvtxDomainDestroy marks the end of the domain. Destroying
+ * a domain unregisters and destroys all objects associated with it such as
+ * registered strings, resource objects, named categories, and started ranges.
+ *
+ * \section RESOURCE_NAMING Resource Naming
+ *
+ * This section covers calls that allow to annotate objects with user-provided
+ * names in order to allow for a better analysis of complex trace data. All of
+ * the functions take the handle or the ID of the object to name and the name.
+ * The functions can be called multiple times during the execution of an
+ * application, however, in that case it is implementation dependent which
+ * name will be reported by the tool.
+ *
+ * \subsection CATEGORY_NAMING Category Naming
+ *
+ * Some function in this library support associating an integer category
+ * to enable filtering and sorting.  The category naming functions allow
+ * the application to associate a user friendly name with the integer
+ * category.  Support for domains have been added in NVTX_VERSION_2 to
+ * avoid collisions when domains are developed independently.
+ *
+ * \subsection RESOURCE_OBJECTS Resource Objects
+ *
+ * Resource objects are a generic mechanism for attaching data to an application
+ * resource.  The identifier field makes the association to a pointer or handle,
+ * while the type field helps provide deeper understanding of the identifier as
+ * well as enabling differentiation in cases where handles generated by different
+ * APIs may collide.  The resource object may also have an associated message to
+ * associate with the application resource, enabling further annotation of this
+ * object and how it is used.
+ *
+ * The resource object was introduced in NVTX_VERSION_2 to supersede existing naming
+ * functions and allow the application resource identified by those functions to be
+ * associated to a domain.  The other naming functions are still supported for backward
+ * compatibility but will be associated only to the default domain.
+ *
+ * \subsection RESOURCE_NAMING_OS Resource Naming
+ *
+ * Some operating system resources creation APIs do not support providing a user friendly
+ * name, such as some OS thread creation APIs.  This API support resource naming though
+ * both through resource objects and functions following the pattern
+ * nvtxName[RESOURCE_TYPE][A|W](identifier, name).  Resource objects introduced in NVTX_VERSION 2
+ * supersede the other functions with a a more general method of assigning names to OS resources,
+ * along with associating them to domains too.  The older nvtxName* functions are only associated
+ * with the default domain.
+ * \section EXTENSIONS Optional Extensions
+ * Optional extensions will either appear within the existing sections the extend or appear
+ * in the "Related Pages" when they introduce new concepts.
+ */
+ /**
+ * Tools Extension API version
+ */
+#if defined(NVTX_VERSION) && NVTX_VERSION < 3
+#error "Trying to #include NVTX version 3 in a source file where an older NVTX version has already been included.  If you are not directly using NVTX (the NVIDIA Tools Extension library), you are getting this error because libraries you are using have included different versions of NVTX.  Suggested solutions are: (1) reorder #includes so the newest NVTX version is included first, (2) avoid using the conflicting libraries in the same .c/.cpp file, or (3) update the library using the older NVTX version to use the newer version instead."
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+/* Header guard */
+#if !defined(NVTX_VERSION)
+#define NVTX_VERSION 3
+/* Platform-dependent defines:
+ *
+ * - NVTX_API - Calling conventions (only used on Windows, and only effects
+ *   32-bit x86 builds, i.e. callee pops stack instead of caller)
+ *
+ * - NVTX_DYNAMIC_EXPORT - Make function an exported entry point from a
+ *   dynamic library or shared object.
+ *
+ * - NVTX_EXPORT_UNMANGLED_FUNCTION_NAME - When used inside the body of a
+ *   function declared with NVTX_DYNAMIC_EXPORT, ensures the symbol exported
+ *   for the function is the exact string of the function's name as written
+ *   in the code.  Name-mangling or name-decoration is disabled.  Note that
+ *   on many platforms this is not necessary, since either the function name
+ *   is already exported verbatim, or the dynamic loader also checks for
+ *   functions with the mangling applied.  Forcing the exports to avoid any
+ *   mangling simplifies usage across platforms and from other languages.
+ */
+#if defined(_WIN32)
+#define NVTX_API __stdcall
+#if defined(_MSC_VER)
+#define NVTX_DYNAMIC_EXPORT __declspec(dllexport)
+#else
+#define NVTX_DYNAMIC_EXPORT __attribute__((visibility("default"))) __declspec(dllexport)
+#endif
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_ARM64EC))
+#define NVTX_EXPORT_UNMANGLED_FUNCTION_NAME _Pragma("comment(linker, \"/EXPORT:\" __FUNCTION__ \"=\" __FUNCDNAME__)")
+#else
+#define NVTX_EXPORT_UNMANGLED_FUNCTION_NAME
+#endif
+#else /* POSIX-like platform */
+#define NVTX_API
+#define NVTX_DYNAMIC_EXPORT __attribute__((visibility("default")))
+#define NVTX_EXPORT_UNMANGLED_FUNCTION_NAME
+#endif /* Platform-dependent defines */
+/* Compiler-dependent defines:
+ *
+ * - NVTX_INLINE_STATIC - Ensure function has internal linkage, and suggest
+ *   avoiding code-gen of the function.  Without this, function has external
+ *   linkage with a strong symbol, so linker expects only one definition.
+ */
+#if defined(_MSC_VER)
+#define NVTX_INLINE_STATIC __inline static
+#else /* GCC-like compiler */
+#if defined(__cplusplus) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L)
+#define NVTX_INLINE_STATIC inline static
+#else
+#define NVTX_INLINE_STATIC __inline__ static
+#endif
+#endif /* Compiler-dependent defines */
+#if !defined(NVTX_NULLPTR)
+#if defined(__cplusplus) && __cplusplus >= 201103L
+#define NVTX_NULLPTR nullptr
+#else
+#define NVTX_NULLPTR NULL
+#endif
+#endif
+#if defined(__cplusplus)
+#define NVTX_STATIC_CAST(type, value)      (static_cast<type>(value))
+#define NVTX_REINTERPRET_CAST(type, value) (reinterpret_cast<type>(value))
+#else
+#define NVTX_STATIC_CAST(type, value)      ((type)(value))
+#define NVTX_REINTERPRET_CAST(type, value) ((type)(value))
+#endif
+/* API linkage/export options:
+ *
+ * - By default, the NVTX API functions are declared as "inline", with the
+ *   implementations provided in the headers.  This allows multiple .c/.cpp
+ *   files in the same project to include NVTX headers without duplicate-
+ *   definition linker errors.  An optimizing compiler should inline these
+ *   implementations, ensuring that the overhead of making an NVTX call is as
+ *   low as possible, even without enabling link-time optimizations.
+ *
+ * - NVTX_NO_IMPL - Use when writing NVTX tools.  If this macro is defined,
+ *   the NVTX headers will provide all the typedefs, macros, and declarations
+ *   of API functions (not marked inline), but no function implementations.
+ *
+ * - NVTX_EXPORT_API - NVTX is normally used in C/C++ applications by simply
+ *   including the headers.  There is no need to link with a static library,
+ *   or to ship a dynamic library with the application (this was changed in
+ *   NVTX v3).  For other languages, it's not convenient to use a header-only
+ *   C library.  The best way to provide an idiomatic NVTX API for another
+ *   language is a .c file that includes the NVTX headers and implements
+ *   functions for that language using its native calling conventions and
+ *   datatypes -- this method can allow static linking to avoid depending on
+ *   a separate dynamic library.  Alternatively, other languages may support
+ *   using C calling conventions to directly call C functions exported from a
+ *   dynamic library.  To build such a library, write a .c file that defines
+ *   NVTX_EXPORT_API and includes any/all of the NVTX headers.  Compile this
+ *   file as a dynamic library, and the NVTX API functions from the included
+ *   headers will be exported with no name-mangling or decoration.  Defining
+ *   ABI-compatible NVTX struct and enum types in the other language is the
+ *   responsibility of the user of this dynamic library.
+ *
+ * Whichever of the above modes is chosen, the following macros are defined
+ * appropriately below to implement that mode.  These macros are only defined
+ * if not already defined by the user, so they may be overridden by users to
+ * handle advanced cases.
+ *
+ * - NVTX_DECLSPEC - Specify linkage for NVTX API functions.
+ *
+ * - NVTX_SET_NAME_MANGLING_OPTIONS - If necessary for the platform, will use
+ *   platform-dependent syntax for ensuring function name is exported with no
+ *   name-mangling or decoration.  Certain compiler and calling-convention
+ *   combinations will add name-mangling or decorations when exporting NVTX
+ *   function name symbols, which makes it much harder for other languages
+ *   to access these functions.  This macro must be used inside a function's
+ *   body because it uses built-in macros to get the current function's name.
+ */
+#if defined(NVTX_NO_IMPL)
+/* When omitting implementation, avoid declaring functions inline
+ * without definitions, since this causes compiler warnings. */
+#if !defined(NVTX_DECLSPEC)
+#define NVTX_DECLSPEC
+#endif
+#if !defined(NVTX_SET_NAME_MANGLING_OPTIONS)
+#define NVTX_SET_NAME_MANGLING_OPTIONS
+#endif
+#elif defined(NVTX_EXPORT_API)
+/* Add platform-dependent declaration syntax to ensure NVTX API functions are
+ * exported when compiling as a dynamic library/shared object, and ensure the
+ * exported names are not mangled/decorated. */
+#if !defined(NVTX_DECLSPEC)
+#define NVTX_DECLSPEC NVTX_DYNAMIC_EXPORT
+#endif
+#if !defined(NVTX_SET_NAME_MANGLING_OPTIONS)
+#define NVTX_SET_NAME_MANGLING_OPTIONS NVTX_EXPORT_UNMANGLED_FUNCTION_NAME
+#endif
+#else /* Normal NVTX usage */
+/* Functions definitions are provided, and functions are declared inline to
+ * avoid duplicate-definition linker errors when using multiple source files. */
+#if !defined(NVTX_DECLSPEC)
+#define NVTX_DECLSPEC NVTX_INLINE_STATIC
+#endif
+#if !defined(NVTX_SET_NAME_MANGLING_OPTIONS)
+#define NVTX_SET_NAME_MANGLING_OPTIONS
+#endif
+#endif
+/* Platform-dependent helpers for defining global variables in header files.
+ * Ensures the linker uses only one instance when multiple source files include
+ * the headers, avoiding duplicate-definition linker errors. */
+#include "nvtxDetail/nvtxLinkOnce.h"
+/* Macros for applying major-version-specific suffix to NVTX global symbols, so
+ * usage of different versions in different source files is supported without
+ * violating the one-definition rule. */
+#define NVTX_VERSIONED_IDENTIFIER_L3(NAME, VERSION) NAME##_v##VERSION
+#define NVTX_VERSIONED_IDENTIFIER_L2(NAME, VERSION) NVTX_VERSIONED_IDENTIFIER_L3(NAME, VERSION)
+#define NVTX_VERSIONED_IDENTIFIER(NAME) NVTX_VERSIONED_IDENTIFIER_L2(NAME, NVTX_VERSION)
+/**
+ * The NVTX library depends on stdint.h.  If the build tool chain in use
+ * does not include stdint.h, then define NVTX_STDINT_TYPES_ALREADY_DEFINED
+ * and define the following types:
+ * <ul>
+ *   <li>uint8_t
+ *   <li>int8_t
+ *   <li>uint16_t
+ *   <li>int16_t
+ *   <li>uint32_t
+ *   <li>int32_t
+ *   <li>uint64_t
+ *   <li>int64_t
+ *   <li>uintptr_t
+ *   <li>intptr_t
+ * </ul>
+ * Be sure to define NVTX_STDINT_TYPES_ALREADY_DEFINED if you are using your
+ * own definitions instead of stdint.h.
+ */
+#ifndef NVTX_STDINT_TYPES_ALREADY_DEFINED
+#include <stdint.h>
+#endif
+#include <stddef.h>
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+/**
+* Result Codes used for the NVTX tool loader.
+*/
+#define NVTX_SUCCESS 0
+#define NVTX_FAIL 1
+#define NVTX_ERR_INIT_LOAD_PROPERTY 2
+#define NVTX_ERR_INIT_ACCESS_LIBRARY 3
+#define NVTX_ERR_INIT_LOAD_LIBRARY 4
+#define NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT 5
+#define NVTX_ERR_INIT_FAILED_LIBRARY_ENTRY_POINT 6
+#define NVTX_ERR_NO_INJECTION_LIBRARY_AVAILABLE 7
+/**
+ * Size of the nvtxEventAttributes_t structure.
+ */
+#define NVTX_EVENT_ATTRIB_STRUCT_SIZE (NVTX_STATIC_CAST(uint16_t, sizeof(nvtxEventAttributes_t)))
+#define NVTX_NO_PUSH_POP_TRACKING (NVTX_STATIC_CAST(int, -2))
+typedef uint64_t nvtxRangeId_t;
+/* Forward declaration of opaque domain registration structure */
+struct nvtxDomainRegistration_st;
+typedef struct nvtxDomainRegistration_st nvtxDomainRegistration;
+/* \brief Domain Handle Structure.
+* \anchor DOMAIN_HANDLE_STRUCTURE
+*
+* This structure is opaque to the user and is used as a handle to reference
+* a domain.  This type is returned from tools when using the NVTX API to
+* create a domain.
+*
+*/
+typedef nvtxDomainRegistration* nvtxDomainHandle_t;
+/* Forward declaration of opaque string registration structure */
+struct nvtxStringRegistration_st;
+typedef struct nvtxStringRegistration_st nvtxStringRegistration;
+/* \brief Registered String Handle Structure.
+* \anchor REGISTERED_STRING_HANDLE_STRUCTURE
+*
+* This structure is opaque to the user and is used as a handle to reference
+* a registered string.  This type is returned from tools when using the NVTX
+* API to create a registered string.
+*
+*/
+typedef nvtxStringRegistration* nvtxStringHandle_t;
+/* ========================================================================= */
+/** \defgroup GENERAL General
+ * @{
+ */
+/** ---------------------------------------------------------------------------
+ * Color Types
+ * ------------------------------------------------------------------------- */
+typedef enum nvtxColorType_t
+{
+    NVTX_COLOR_UNKNOWN  = 0,                 /**< Color attribute is unused. */
+    NVTX_COLOR_ARGB     = 1                  /**< An ARGB color is provided. */
+} nvtxColorType_t;
+/** ---------------------------------------------------------------------------
+ * Message Types
+ * ------------------------------------------------------------------------- */
+typedef enum nvtxMessageType_t
+{
+    NVTX_MESSAGE_UNKNOWN          = 0,    /**< Message attribute is unused. */
+    NVTX_MESSAGE_TYPE_ASCII       = 1,    /**< A character sequence is used as payload. */
+    NVTX_MESSAGE_TYPE_UNICODE     = 2,     /**< A wide character sequence is used as payload. */
+    /* NVTX_VERSION_2 */
+    NVTX_MESSAGE_TYPE_REGISTERED  = 3     /**< A unique string handle that was registered
+                                                with \ref nvtxDomainRegisterStringA() or
+                                                \ref nvtxDomainRegisterStringW(). */
+} nvtxMessageType_t;
+typedef union nvtxMessageValue_t
+{
+    const char* ascii;
+    const wchar_t* unicode;
+    /* NVTX_VERSION_2 */
+    nvtxStringHandle_t registered;
+} nvtxMessageValue_t;
+/* ------------------------------------------------------------------------- */
+/** \brief Force initialization (optional)
+ * \anchor FORCE_INITIALIZATION
+*
+* Force NVTX library to initialize.  The first call to any NVTX API function
+* will automatically initialize the entire API.  This can make the first call
+* much slower than subsequent calls.  In applications where the first call to
+* NVTX may be in a performance-critical section, calling nvtxInitialize before
+* any performance-critical sections will ensure NVTX initialization occurs at
+* an acceptable time.  Since nvtxInitialize takes no parameters and has no
+* expected behavior besides initialization, it is convenient to add a call to
+* nvtxInitialize in NVTX-instrumented applications that need to force earlier
+* initialization without changing any other code.  For example, if an app's
+* first NVTX call is nvtxDomainCreate, and it is difficult to move that call
+* earlier because the domain handle must be stored in an object only created
+* at that point, adding a call to nvtxInitialize at the top of main() will
+* ensure the later call to nvtxDomainCreate is as fast as possible.
+*
+* \version NVTX_VERSION_3
+*
+* \param reserved - must be zero or NULL.
+*
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxInitialize(const void* reserved);
+/** @} */
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup EVENT_ATTRIBUTES Event Attributes
+* @{
+*/
+/** ---------------------------------------------------------------------------
+* Payload Types
+* ------------------------------------------------------------------------- */
+typedef enum nvtxPayloadType_t
+{
+    NVTX_PAYLOAD_UNKNOWN = 0,   /**< Payload attribute is unused. */
+    NVTX_PAYLOAD_TYPE_UNSIGNED_INT64 = 1,   /**< A 64 bit unsigned integer value is used as payload. */
+    NVTX_PAYLOAD_TYPE_INT64 = 2,   /**< A 64 bit signed integer value is used as payload. */
+    NVTX_PAYLOAD_TYPE_DOUBLE = 3,   /**< A 64 bit floating point value is used as payload. */
+    /* NVTX_VERSION_2 */
+    NVTX_PAYLOAD_TYPE_UNSIGNED_INT32 = 4,   /**< A 32 bit floating point value is used as payload. */
+    NVTX_PAYLOAD_TYPE_INT32 = 5,   /**< A 32 bit floating point value is used as payload. */
+    NVTX_PAYLOAD_TYPE_FLOAT = 6    /**< A 32 bit floating point value is used as payload. */
+} nvtxPayloadType_t;
+/** \brief Event Attribute Structure.
+ * \anchor EVENT_ATTRIBUTE_STRUCTURE
+ *
+ * This structure is used to describe the attributes of an event. The layout of
+ * the structure is defined by a specific version of the tools extension
+ * library and can change between different versions of the Tools Extension
+ * library.
+ *
+ * \par Guidelines
+ * The caller should always perform the following three tasks when using
+ * attributes:
+ * <ul>
+ *    <li>Zero the structure
+ *    <li>Set the version field
+ *    <li>Set the size field
+ * </ul>
+ *
+ * Zeroing the structure sets all the event attributes types and values
+ * to the default value.
+ *
+ * The version and size field are used by the Tools Extension
+ * implementation to handle multiple versions of the attributes structure.
+ *
+ * It is recommended that the caller use one of the following to methods
+ * to initialize the event attributes structure:
+ *
+ * \par Method 1
+ * Initializing nvtxEventAttributes for future compatibility:
+ * \code
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * eventAttrib.version = NVTX_VERSION;
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+ * \endcode
+ *
+ * \par Method 2
+ * Initializing nvtxEventAttributes for a specific version:
+ * \code
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * eventAttrib.version = 1;
+ * eventAttrib.size = (uint16_t)(sizeof(nvtxEventAttributes_v1));
+ * \endcode
+ *
+ * If the caller uses Method 1 it is critical that the entire binary
+ * layout of the structure be configured to 0 so that all fields
+ * are initialized to the default value.
+ *
+ * The caller should either use both NVTX_VERSION and
+ * NVTX_EVENT_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
+ * and a versioned type (Method 2).  Using a mix of the two methods
+ * will likely cause either source level incompatibility or binary
+ * incompatibility in the future.
+ *
+ * \par Example
+ * Populate an attributes structure:
+ * \code
+ * // Initialize
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * eventAttrib.version = NVTX_VERSION;
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+ *
+ * // Configure the Attributes
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
+ * eventAttrib.color = 0xFF880000;
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+ * eventAttrib.message.ascii = "Example";
+ * \endcode
+ *
+ * In the example the caller does not have to set the value of
+ * \ref ::nvtxEventAttributes_v2::category or
+ * \ref ::nvtxEventAttributes_v2::payload as these fields were set to
+ * the default value by {0}.
+ * \sa
+ * ::nvtxDomainMarkEx
+ * ::nvtxDomainRangeStartEx
+ * ::nvtxDomainRangePushEx
+ */
+typedef struct nvtxEventAttributes_v2
+{
+    /**
+    * \brief Version flag of the structure.
+    *
+    * Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
+    * supported in this header file. This can optionally be overridden to
+    * another version of the tools extension library.
+    */
+    uint16_t version;
+    /**
+    * \brief Size of the structure.
+    *
+    * Needs to be set to the size in bytes of the event attribute
+    * structure used to specify the event.
+    */
+    uint16_t size;
+    /**
+     * \brief ID of the category the event is assigned to.
+     *
+     * A category is a user-controlled ID that can be used to group
+     * events.  The tool may use category IDs to improve filtering or
+     * enable grouping of events in the same category. The functions
+     * \ref ::nvtxNameCategoryA or \ref ::nvtxNameCategoryW can be used
+     * to name a category.
+     *
+     * Default Value is 0
+     */
+    uint32_t category;
+    /** \brief Color type specified in this attribute structure.
+     *
+     * Defines the color format of the attribute structure's \ref COLOR_FIELD
+     * "color" field.
+     *
+     * Default Value is NVTX_COLOR_UNKNOWN
+     */
+    int32_t colorType;              /* nvtxColorType_t */
+    /** \brief Color assigned to this event. \anchor COLOR_FIELD
+     *
+     * The color that the tool should use to visualize the event.
+     */
+    uint32_t color;
+    /**
+     * \brief Payload type specified in this attribute structure.
+     *
+     * Defines the payload format of the attribute structure's \ref PAYLOAD_FIELD
+     * "payload" field.
+     *
+     * Default Value is NVTX_PAYLOAD_UNKNOWN
+     */
+    int32_t payloadType;            /* nvtxPayloadType_t */
+    int32_t reserved0;
+    /**
+     * \brief Payload assigned to this event. \anchor PAYLOAD_FIELD
+     *
+     * A numerical value that can be used to annotate an event. The tool could
+     * use the payload data to reconstruct graphs and diagrams.
+     */
+    union payload_t
+    {
+        uint64_t ullValue;
+        int64_t llValue;
+        double dValue;
+        /* NVTX_VERSION_2 */
+        uint32_t uiValue;
+        int32_t iValue;
+        float fValue;
+    } payload;
+    /** \brief Message type specified in this attribute structure.
+     *
+     * Defines the message format of the attribute structure's \ref MESSAGE_FIELD
+     * "message" field.
+     *
+     * Default Value is NVTX_MESSAGE_UNKNOWN
+     */
+    int32_t messageType;            /* nvtxMessageType_t */
+    /** \brief Message assigned to this attribute structure. \anchor MESSAGE_FIELD
+     *
+     * The text message that is attached to an event.
+     */
+    nvtxMessageValue_t message;
+} nvtxEventAttributes_v2;
+typedef struct nvtxEventAttributes_v2 nvtxEventAttributes_t;
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup MARKERS_AND_RANGES Markers and Ranges
+ *
+ * See \ref MARKERS_AND_RANGES for more details
+ *
+ * @{
+ */
+/** \name Marker */
+/* ------------------------------------------------------------------------- */
+/** \brief Marks an instantaneous event in the application.
+*
+* A marker can contain a text message or specify additional information
+* using the event attributes structure.  These attributes include a text
+* message, color, category, and a payload. Each of the attributes is optional
+* and can only be sent out using the \ref nvtxDomainMarkEx function.
+*
+* nvtxDomainMarkEx(NULL, event) is equivalent to calling
+* nvtxMarkEx(event).
+*
+* \param domain    - The domain of scoping the category.
+* \param eventAttrib - The event attribute structure defining the marker's
+* attribute types and attribute values.
+*
+* \sa
+* ::nvtxMarkEx
+*
+* \version NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxDomainMarkEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Marks an instantaneous event in the application.
+ *
+ * A marker can contain a text message or specify additional information
+ * using the event attributes structure.  These attributes include a text
+ * message, color, category, and a payload. Each of the attributes is optional
+ * and can only be sent out using the \ref nvtxMarkEx function.
+ * If \ref nvtxMarkA or \ref nvtxMarkW are used to specify the marker
+ * or if an attribute is unspecified then a default value will be used.
+ *
+ * \param eventAttrib - The event attribute structure defining the marker's
+ * attribute types and attribute values.
+ *
+ * \par Example
+ * Place a mark with attributes:
+ * \code
+ * // zero the structure
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * // set the version and the size information
+ * eventAttrib.version = NVTX_VERSION;
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+ * // configure the attributes.  0 is the default for all attributes.
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
+ * eventAttrib.color = 0xFF880000;
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+ * eventAttrib.message.ascii = "Example nvtxMarkEx";
+ * nvtxMarkEx(&eventAttrib);
+ * \endcode
+ *
+ * \sa
+ * ::nvtxDomainMarkEx
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxMarkEx(const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Marks an instantaneous event in the application.
+ *
+ * A marker created using \ref nvtxMarkA or \ref nvtxMarkW contains only a
+ * text message.
+ *
+ * \param message     - The message associated to this marker event.
+ *
+ * \par Example
+ * Place a mark:
+ * \code
+ * nvtxMarkA("Example nvtxMarkA");
+ * nvtxMarkW(L"Example nvtxMarkW");
+ * \endcode
+ *
+ * \sa
+ * ::nvtxDomainMarkEx
+ * ::nvtxMarkEx
+ *
+ * \version NVTX_VERSION_0
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxMarkA(const char* message);
+NVTX_DECLSPEC void NVTX_API nvtxMarkW(const wchar_t* message);
+/** @} */
+/** \name Process Ranges */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a process range in a domain.
+*
+* \param domain    - The domain of scoping the category.
+* \param eventAttrib - The event attribute structure defining the range's
+* attribute types and attribute values.
+*
+* \return The unique ID used to correlate a pair of Start and End events.
+*
+* \remarks Ranges defined by Start/End can overlap.
+*
+* \par Example
+* Start a range with attributes for a domain:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("my domain");
+* nvtxEventAttributes_t eventAttrib = {0};
+* eventAttrib.version = NVTX_VERSION;
+* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* eventAttrib.message.ascii = "my range";
+* nvtxRangeId_t rangeId = nvtxDomainRangeStartEx(domain, &eventAttrib);
+* // ...
+* nvtxDomainRangeEnd(domain, rangeId);
+* \endcode
+*
+* \sa
+* ::nvtxDomainRangeEnd
+*
+* \version NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxDomainRangeStartEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a process range.
+ *
+ * \param eventAttrib - The event attribute structure defining the range's
+ * attribute types and attribute values.
+ *
+ * \return The unique ID used to correlate a pair of Start and End events.
+ *
+ * \remarks Ranges defined by Start/End can overlap.
+ *
+ * \par Example
+ * Start a range with attributes:
+ * \code
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * eventAttrib.version = NVTX_VERSION;
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+ * eventAttrib.category = 3;
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
+ * eventAttrib.color = 0xFF0088FF;
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+ * eventAttrib.message.ascii = "Example Range";
+ * nvtxRangeId_t rangeId = nvtxRangeStartEx(&eventAttrib);
+ * // ...
+ * nvtxRangeEnd(rangeId);
+ * \endcode
+ *
+ * \sa
+ * ::nvtxRangeEnd
+ * ::nvtxDomainRangeStartEx
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartEx(const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a process range.
+ *
+ * \param message     - The event message associated to this range event.
+ *
+ * \return The unique ID used to correlate a pair of Start and End events.
+ *
+ * \remarks Ranges defined by Start/End can overlap.
+ *
+ * \par Example
+ * Start a range:
+ * \code
+ * nvtxRangeId_t r1 = nvtxRangeStartA("Range 1");
+ * nvtxRangeId_t r2 = nvtxRangeStartW(L"Range 2");
+ * nvtxRangeEnd(r1);
+ * nvtxRangeEnd(r2);
+ * \endcode
+ *
+ * \sa
+ * ::nvtxRangeEnd
+ * ::nvtxRangeStartEx
+ * ::nvtxDomainRangeStartEx
+ *
+ * \version NVTX_VERSION_0
+ * @{ */
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartA(const char* message);
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartW(const wchar_t* message);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Ends a process range.
+*
+* \param domain - The domain
+* \param id - The correlation ID returned from a nvtxRangeStart call.
+*
+* \remarks This function is offered completeness but is an alias for ::nvtxRangeEnd.
+* It does not need a domain param since that is associated with the range ID at ::nvtxDomainRangeStartEx
+*
+* \par Example
+* End a range for a domain:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("my domain");
+* nvtxEventAttributes_t eventAttrib = {0};
+* eventAttrib.version = NVTX_VERSION;
+* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* eventAttrib.message.ascii = "my range";
+* nvtxRangeId_t rangeId = nvtxDomainRangeStartEx(domain, &eventAttrib);
+* // ...
+* nvtxDomainRangeEnd(domain, rangeId);
+* \endcode
+*
+* \sa
+* ::nvtxDomainRangeStartEx
+*
+* \version NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxDomainRangeEnd(nvtxDomainHandle_t domain, nvtxRangeId_t id);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Ends a process range.
+ *
+ * \param id - The correlation ID returned from an nvtxRangeStart call.
+ *
+ * \sa
+ * ::nvtxDomainRangeStartEx
+ * ::nvtxRangeStartEx
+ * ::nvtxRangeStartA
+ * ::nvtxRangeStartW
+ *
+ * \version NVTX_VERSION_0
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxRangeEnd(nvtxRangeId_t id);
+/** @} */
+/** \name Thread Ranges */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a nested thread range.
+*
+* \param domain    - The domain of scoping.
+* \param eventAttrib - The event attribute structure defining the range's
+* attribute types and attribute values.
+*
+* \return The 0 based level of range being started. This value is scoped to the domain.
+* If an error occurs, a negative value is returned.
+*
+* \par Example
+* Push a range with attributes for a domain:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
+* nvtxEventAttributes_t eventAttrib = {0};
+* eventAttrib.version = NVTX_VERSION;
+* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib.colorType = NVTX_COLOR_ARGB;
+* eventAttrib.color = 0xFFFF0000;
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* eventAttrib.message.ascii = "Level 0";
+* nvtxDomainRangePushEx(domain, &eventAttrib);
+*
+* // Re-use eventAttrib
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_UNICODE;
+* eventAttrib.message.unicode = L"Level 1";
+* nvtxDomainRangePushEx(domain, &eventAttrib);
+*
+* nvtxDomainRangePop(domain); // Level 1
+* nvtxDomainRangePop(domain); // Level 0
+* \endcode
+*
+* \sa
+* ::nvtxDomainRangePop
+*
+* \version NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC int NVTX_API nvtxDomainRangePushEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a nested thread range.
+ *
+ * \param eventAttrib - The event attribute structure defining the range's
+ * attribute types and attribute values.
+ *
+ * \return The 0 based level of range being started. This level is per domain.
+ * If an error occurs a negative value is returned.
+ *
+ * \par Example
+ * Push a range with attributes:
+ * \code
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * eventAttrib.version = NVTX_VERSION;
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
+ * eventAttrib.color = 0xFFFF0000;
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+ * eventAttrib.message.ascii = "Level 0";
+ * nvtxRangePushEx(&eventAttrib);
+ *
+ * // Re-use eventAttrib
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_UNICODE;
+ * eventAttrib.message.unicode = L"Level 1";
+ * nvtxRangePushEx(&eventAttrib);
+ *
+ * nvtxRangePop(); // Level 1
+ * nvtxRangePop(); // Level 0
+ * \endcode
+ *
+ * \sa
+ * ::nvtxDomainRangePushEx
+ * ::nvtxRangePop
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC int NVTX_API nvtxRangePushEx(const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a nested thread range.
+ *
+ * \param message     - The event message associated to this range event.
+ *
+ * \return The 0 based level of range being started.  If an error occurs a
+ * negative value is returned.
+ *
+ * \par Example
+ * Push a range:
+ * \code
+ * nvtxRangePushA("Level 0");
+ * nvtxRangePushW(L"Level 1");
+ * nvtxRangePop(); // Level 1
+ * nvtxRangePop(); // Level 0
+ * \endcode
+ *
+ * \sa
+ * ::nvtxDomainRangePushEx
+ * ::nvtxRangePop
+ *
+ * \version NVTX_VERSION_0
+ * @{ */
+NVTX_DECLSPEC int NVTX_API nvtxRangePushA(const char* message);
+NVTX_DECLSPEC int NVTX_API nvtxRangePushW(const wchar_t* message);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Ends a nested thread range.
+*
+* \return The level of the range being ended. If an error occurs a negative
+* value is returned on the current thread.
+*
+* \par Example
+* Pop a range for a domain:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
+* nvtxEventAttributes_t eventAttrib = {0};
+* eventAttrib.version = NVTX_VERSION;
+* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib.colorType = NVTX_COLOR_ARGB;
+* eventAttrib.color = 0xFFFF0000;
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* eventAttrib.message.ascii = "Level 0";
+* nvtxDomainRangePushEx(domain, &eventAttrib);
+*
+* // Re-use eventAttrib
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_UNICODE;
+* eventAttrib.message.unicode = L"Level 1";
+* nvtxDomainRangePushEx(domain, &eventAttrib);
+*
+* nvtxDomainRangePop(domain); // Level 1
+* nvtxDomainRangePop(domain); // Level 0
+* \endcode
+*
+* \sa
+* ::nvtxRangePushEx
+* ::nvtxRangePushA
+* ::nvtxRangePushW
+*
+* \version NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC int NVTX_API nvtxDomainRangePop(nvtxDomainHandle_t domain);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Ends a nested thread range.
+ *
+ * \return The level of the range being ended. If an error occurs a negative
+ * value is returned on the current thread.
+ *
+ * \par Example
+ * Pop a range:
+ * \code
+ * nvtxRangePushA("Level 0");
+ * nvtxRangePushW(L"Level 1");
+ * nvtxRangePop(); // Level 1
+ * nvtxRangePop(); // Level 0
+ * \endcode
+ *
+ * \sa
+ * ::nvtxRangePushEx
+ * ::nvtxRangePushA
+ * ::nvtxRangePushW
+ *
+ * \version NVTX_VERSION_0
+ * @{ */
+NVTX_DECLSPEC int NVTX_API nvtxRangePop(void);
+/** @} */
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup RESOURCE_NAMING Resource Naming
+ *
+ * See \ref RESOURCE_NAMING for more details
+ *
+ * @{
+ */
+/*  ------------------------------------------------------------------------- */
+/** \name Functions for Generic Resource Naming*/
+/*  ------------------------------------------------------------------------- */
+/*  ------------------------------------------------------------------------- */
+/** \cond SHOW_HIDDEN
+* \brief Resource typing helpers.
+*
+* Classes are used to make it easy to create a series of resource types
+* per API without collisions
+*/
+#define NVTX_RESOURCE_MAKE_TYPE(CLASS, INDEX) (((NVTX_STATIC_CAST(uint32_t, NVTX_RESOURCE_CLASS_ ## CLASS))<<16)|(NVTX_STATIC_CAST(uint32_t, INDEX)))
+#define NVTX_RESOURCE_CLASS_GENERIC 1
+/** \endcond */
+/* ------------------------------------------------------------------------- */
+/** \brief Generic resource type for when a resource class is not available.
+*
+* \sa
+* ::nvtxDomainResourceCreate
+*
+* \version NVTX_VERSION_2
+*/
+typedef enum nvtxResourceGenericType_t
+{
+    NVTX_RESOURCE_TYPE_UNKNOWN = 0,
+    NVTX_RESOURCE_TYPE_GENERIC_POINTER = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 1), /**< Generic pointer assumed to have no collisions with other pointers. */
+    NVTX_RESOURCE_TYPE_GENERIC_HANDLE = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 2), /**< Generic handle assumed to have no collisions with other handles. */
+    NVTX_RESOURCE_TYPE_GENERIC_THREAD_NATIVE = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 3), /**< OS native thread identifier. */
+    NVTX_RESOURCE_TYPE_GENERIC_THREAD_POSIX = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 4) /**< POSIX pthread identifier. */
+} nvtxResourceGenericType_t;
+/** \brief Resource Attribute Structure.
+* \anchor RESOURCE_ATTRIBUTE_STRUCTURE
+*
+* This structure is used to describe the attributes of a resource. The layout of
+* the structure is defined by a specific version of the tools extension
+* library and can change between different versions of the Tools Extension
+* library.
+*
+* \par Guidelines
+* The caller should always perform the following three tasks when using
+* attributes:
+* <ul>
+*    <li>Zero the structure
+*    <li>Set the version field
+*    <li>Set the size field
+* </ul>
+*
+* Zeroing the structure sets all the resource attributes types and values
+* to the default value.
+*
+* The version and size field are used by the Tools Extension
+* implementation to handle multiple versions of the attributes structure.
+*
+* It is recommended that the caller use one of the following to methods
+* to initialize the event attributes structure:
+*
+* \par Method 1
+* Initializing nvtxEventAttributes for future compatibility:
+* \code
+* nvtxResourceAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
+* \endcode
+*
+* \par Method 2
+* Initializing nvtxEventAttributes for a specific version:
+* \code
+* nvtxResourceAttributes_v0 attribs = {0};
+* attribs.version = 2;
+* attribs.size = (uint16_t)(sizeof(nvtxResourceAttributes_v0));
+* \endcode
+*
+* If the caller uses Method 1 it is critical that the entire binary
+* layout of the structure be configured to 0 so that all fields
+* are initialized to the default value.
+*
+* The caller should either use both NVTX_VERSION and
+* NVTX_RESOURCE_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
+* and a versioned type (Method 2).  Using a mix of the two methods
+* will likely cause either source level incompatibility or binary
+* incompatibility in the future.
+*
+* \par Example
+* Register a resource and populate its attributes:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
+*
+* // Initialize
+* nvtxResourceAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
+*
+* // Configure the Attributes
+* attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER;
+* attribs.identifier.pValue = (const void*)pMutex;
+* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* attribs.message.ascii = "Single thread access to database.";
+*
+* nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, &attribs);
+* \endcode
+*
+* \sa
+* ::nvtxDomainResourceCreate
+*/
+typedef struct nvtxResourceAttributes_v0
+{
+    /**
+    * \brief Version flag of the structure.
+    *
+    * Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
+    * supported in this header file. This can optionally be overridden to
+    * another version of the tools extension library.
+    */
+    uint16_t version;
+    /**
+    * \brief Size of the structure.
+    *
+    * Needs to be set to the size in bytes of this attribute
+    * structure.
+    */
+    uint16_t size;
+    /**
+    * \brief Identifier type specifies how to interpret the identifier field
+    *
+    * Defines the identifier format of the attribute structure's \ref RESOURCE_IDENTIFIER_FIELD
+    * "identifier" field.
+    *
+    * Default Value is NVTX_RESOURCE_TYPE_UNKNOWN
+    */
+    int32_t identifierType;            /* values from enums following the pattern nvtxResource[name]Type_t */
+    /**
+    * \brief Identifier for the resource.
+    * \anchor RESOURCE_IDENTIFIER_FIELD
+    *
+    * An identifier may be a pointer or a handle to an OS or middleware API object.
+    * The resource type will assist in avoiding collisions where handles values may collide.
+    */
+    union identifier_t
+    {
+        const void* pValue;
+        uint64_t ullValue;
+    } identifier;
+    /** \brief Message type specified in this attribute structure.
+    *
+    * Defines the message format of the attribute structure's \ref RESOURCE_MESSAGE_FIELD
+    * "message" field.
+    *
+    * Default Value is NVTX_MESSAGE_UNKNOWN
+    */
+    int32_t messageType;            /* nvtxMessageType_t */
+    /** \brief Message assigned to this attribute structure. \anchor RESOURCE_MESSAGE_FIELD
+    *
+    * The text message that is attached to a resource.
+    */
+    nvtxMessageValue_t message;
+} nvtxResourceAttributes_v0;
+typedef struct nvtxResourceAttributes_v0 nvtxResourceAttributes_t;
+/* \cond SHOW_HIDDEN
+* \version NVTX_VERSION_2
+*/
+#define NVTX_RESOURCE_ATTRIB_STRUCT_SIZE (NVTX_STATIC_CAST(uint16_t, sizeof(nvtxResourceAttributes_v0)))
+typedef struct nvtxResourceHandle* nvtxResourceHandle_t;
+/** \endcond */
+/* ------------------------------------------------------------------------- */
+/** \brief Create a resource object to track and associate data with OS and middleware objects
+*
+* Allows users to associate an API handle or pointer with a user-provided name.
+*
+*
+* \param domain - Domain to own the resource object
+* \param attribs - Attributes to be associated with the resource
+*
+* \return A handle that represents the newly created resource object.
+*
+* \par Example
+* Register a resource:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
+* nvtxResourceAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
+* attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER;
+* attribs.identifier.pValue = (const void*)pMutex;
+* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* attribs.message.ascii = "Single thread access to database.";
+* nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, &attribs);
+* \endcode
+*
+* \sa
+* ::nvtxResourceAttributes_t
+* ::nvtxDomainResourceDestroy
+*
+* \version NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC nvtxResourceHandle_t NVTX_API nvtxDomainResourceCreate(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Destroy a resource object to track and associate data with OS and middleware objects
+*
+* Allows users to associate an API handle or pointer with a user-provided name.
+*
+* \param resource - Handle to the resource in which to operate.
+*
+* \par Example
+* Unregister a resource:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
+* nvtxResourceAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
+* attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER;
+* attribs.identifier.pValue = (const void*)pMutex;
+* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* attribs.message.ascii = "Single thread access to database.";
+* nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, &attribs);
+* // ...
+* nvtxDomainResourceDestroy(handle);
+* \endcode
+*
+* \sa
+* ::nvtxDomainResourceCreate
+*
+* \version NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxDomainResourceDestroy(nvtxResourceHandle_t resource);
+/** @} */
+/** \name Functions for NVTX Category Naming*/
+/* ------------------------------------------------------------------------- */
+/**
+* \brief Annotate an NVTX category used within a domain.
+*
+* Categories are used to group sets of events. Each category is identified
+* through a unique ID and that ID is passed into any of the marker/range
+* events to assign that event to a specific category. The nvtxDomainNameCategory
+* function calls allow the user to assign a name to a category ID that is
+* specific to the domain.
+*
+* nvtxDomainNameCategory(NULL, category, name) is equivalent to calling
+* nvtxNameCategory(category, name).
+*
+* \param domain    - The domain of scoping the category.
+* \param category  - The category ID to name.
+* \param name      - The name of the category.
+*
+* \remarks The category names are tracked per domain.
+*
+* \par Example
+* Assign names to categories in a domain:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example");
+* nvtxDomainNameCategoryA(domain, 1, "Memory Allocation");
+* nvtxDomainNameCategoryW(domain, 2, L"Memory Transfer");
+* \endcode
+*
+* \version NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryA(nvtxDomainHandle_t domain, uint32_t category, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryW(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name);
+/** @} */
+/** \brief Annotate an NVTX category.
+ *
+ * Categories are used to group sets of events. Each category is identified
+ * through a unique ID and that ID is passed into any of the marker/range
+ * events to assign that event to a specific category. The nvtxNameCategory
+ * function calls allow the user to assign a name to a category ID.
+ *
+ * \param category - The category ID to name.
+ * \param name     - The name of the category.
+ *
+ * \remarks The category names are tracked per process.
+ *
+ * \par Example
+ * Assign names to categories:
+ * \code
+ * nvtxNameCategory(1, "Memory Allocation");
+ * nvtxNameCategory(2, "Memory Transfer");
+ * nvtxNameCategory(3, "Memory Object Lifetime");
+ * \endcode
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCategoryA(uint32_t category, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCategoryW(uint32_t category, const wchar_t* name);
+/** @} */
+/** \name Functions for OS Threads Naming*/
+/* ------------------------------------------------------------------------- */
+/** \brief Annotate an OS thread.
+ *
+ * Allows the user to name an active thread of the current process. If an
+ * invalid thread ID is provided or a thread ID from a different process is
+ * used the behavior of the tool is implementation dependent.
+ *
+ * Tools expect thread ID to be a number that uniquely identifies the thread
+ * at the time of the call. Note that a thread's ID can be reused after
+ * it is destroyed. Tools may choose how to handle aliasing of thread IDs.
+ *
+ * POSIX pthread_t type returned by pthread_self() may not comply with these
+ * expectations. Please use OS-specific thread ID instead of pthread_t.
+ *
+ * The thread name is associated to the default domain.  To support domains
+ * use resource objects via ::nvtxDomainResourceCreate.
+ *
+ * \param threadId - The ID of the thread to name.
+ * \param name     - The name of the thread.
+ *
+ * \par Examples
+ * Name a thread based on the given operating system:
+ *
+ * Windows:
+ * \code
+ * #include <windows.h>
+ * nvtxNameOsThread(GetCurrentThreadId(), "Current thread");
+ * nvtxNameOsThread(GetThreadId(SomeThreadHandle), "Other thread");
+ * \endcode
+ *
+ * Android:
+ * \code
+ * #include <unistd.h>
+ * nvtxNameOsThreadA(gettid(), "Current thread");
+ * nvtxNameOsThreadA(getpid(), "Main thread");
+ * \endcode
+ *
+ * Linux:
+ * \code
+ * #include <sys/syscall.h>
+ * nvtxNameOsThreadA(syscall(SYS_gettid), "Current thread");
+ * \endcode
+ * \code
+ * #include <unistd.h>
+ * nvtxNameOsThreadA(getpid(), "Main thread");
+ * \endcode
+ *
+ * macOS:
+ * \code
+ * #include <sys/syscall.h>
+ * nvtxNameOsThreadA(syscall(SYS_thread_selfid), "Current thread");
+ * \endcode
+ * \code
+ * #include <pthread.h>
+ * __uint64_t id;
+ * pthread_threadid_np(pthread_self(), &id);
+ * nvtxNameOsThreadA(id, "Current thread");
+ * pthread_threadid_np(somePThreadId, &id);
+ * nvtxNameOsThreadA(id, "Other thread");
+ * \endcode
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadA(uint32_t threadId, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadW(uint32_t threadId, const wchar_t* name);
+/** @} */
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup STRING_REGISTRATION String Registration
+*
+* Registered strings are intended to increase performance by lowering instrumentation
+* overhead.  String may be registered once and the handle may be passed in place of
+* a string where an the APIs may allow.
+*
+* See \ref STRING_REGISTRATION for more details
+*
+* @{
+*/
+/* ------------------------------------------------------------------------- */
+/** \brief Register a string.
+* Registers an immutable string with NVTX. Once registered the pointer used
+* to register the domain name can be used in nvtxEventAttributes_t
+* \ref MESSAGE_FIELD. This allows NVTX implementation to skip copying the
+* contents of the message on each event invocation.
+*
+* String registration is an optimization. It is recommended to use string
+* registration if the string will be passed to an event many times.
+*
+* String are not unregistered, except that by unregistering the entire domain
+*
+* \param domain  - Domain handle. If NULL then the global domain is used.
+* \param string    - A unique pointer to a sequence of characters.
+*
+* \return A handle representing the registered string.
+*
+* \par Example
+* Register a string:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("com.nvidia.nvtx.example");
+* nvtxStringHandle_t message = nvtxDomainRegisterStringA(domain, "registered string");
+* nvtxEventAttributes_t eventAttrib = {0};
+* eventAttrib.version = NVTX_VERSION;
+* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_REGISTERED;
+* eventAttrib.message.registered = message;
+* \endcode
+*
+* \version NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringA(nvtxDomainHandle_t domain, const char* string);
+NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringW(nvtxDomainHandle_t domain, const wchar_t* string);
+/** @} */
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup DOMAINS Domains
+*
+* Domains are used to group events to a developer defined scope. Middleware
+* vendors may also scope their own events to avoid collisions with the
+* the application developer's events, so that the application developer may
+* inspect both parts and easily differentiate or filter them.  By default
+* all events are scoped to a global domain where NULL is provided or when
+* using APIs provided b versions of NVTX below v2
+*
+* Domains are intended to be typically long lived objects with the intention
+* of logically separating events of large modules from each other such as
+* middleware libraries from each other and the main application.
+*
+* See \ref DOMAINS for more details
+*
+* @{
+*/
+/* ------------------------------------------------------------------------- */
+/** \brief Register a NVTX domain.
+*
+* Domains are used to scope annotations. All NVTX_VERSION_0 and NVTX_VERSION_1
+* annotations are scoped to the global domain. The function nvtxDomainCreate
+* creates a new named domain.
+*
+* Each domain maintains its own nvtxRangePush and nvtxRangePop stack.
+*
+* \param name - A unique string representing the domain.
+*
+* \return A handle representing the domain.
+*
+* \par Example
+* Create a domain:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("com.nvidia.nvtx.example");
+*
+* nvtxMarkA("nvtxMarkA to global domain");
+*
+* nvtxEventAttributes_t eventAttrib1 = {0};
+* eventAttrib1.version = NVTX_VERSION;
+* eventAttrib1.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib1.message.ascii = "nvtxDomainMarkEx to global domain";
+* nvtxDomainMarkEx(NULL, &eventAttrib1);
+*
+* nvtxEventAttributes_t eventAttrib2 = {0};
+* eventAttrib2.version = NVTX_VERSION;
+* eventAttrib2.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib2.message.ascii = "nvtxDomainMarkEx to com.nvidia.nvtx.example";
+* nvtxDomainMarkEx(domain, &eventAttrib2);
+*
+* nvtxDomainDestroy(domain);
+* \endcode
+*
+* \sa
+* ::nvtxDomainDestroy
+*
+* \version NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateA(const char* name);
+NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateW(const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Unregister a NVTX domain.
+*
+* Unregisters the domain handle and frees all domain specific resources.
+*
+* \param domain    - the domain handle
+*
+* \par Example
+* Destroy a domain:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("com.nvidia.nvtx.example");
+* // ...
+* nvtxDomainDestroy(domain);
+* \endcode
+*
+* \sa
+* ::nvtxDomainCreateA
+* ::nvtxDomainCreateW
+*
+* \version NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxDomainDestroy(nvtxDomainHandle_t domain);
+/** @} */
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \cond SHOW_HIDDEN */
+#ifdef UNICODE
+    #define nvtxMark            nvtxMarkW
+    #define nvtxRangeStart      nvtxRangeStartW
+    #define nvtxRangePush       nvtxRangePushW
+    #define nvtxNameCategory    nvtxNameCategoryW
+    #define nvtxNameOsThread    nvtxNameOsThreadW
+    /* NVTX_VERSION_2 */
+    #define nvtxDomainCreate         nvtxDomainCreateW
+    #define nvtxDomainRegisterString nvtxDomainRegisterStringW
+    #define nvtxDomainNameCategory   nvtxDomainNameCategoryW
+#else
+    #define nvtxMark            nvtxMarkA
+    #define nvtxRangeStart      nvtxRangeStartA
+    #define nvtxRangePush       nvtxRangePushA
+    #define nvtxNameCategory    nvtxNameCategoryA
+    #define nvtxNameOsThread    nvtxNameOsThreadA
+    /* NVTX_VERSION_2 */
+    #define nvtxDomainCreate         nvtxDomainCreateA
+    #define nvtxDomainRegisterString nvtxDomainRegisterStringA
+    #define nvtxDomainNameCategory   nvtxDomainNameCategoryA
+#endif
+/** \endcond */
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+#define NVTX_IMPL_GUARD /* Ensure other headers cannot be included directly */
+#include "nvtxDetail/nvtxTypes.h"
+#ifndef NVTX_NO_IMPL
+#include "nvtxDetail/nvtxImpl.h"
+#endif /*NVTX_NO_IMPL*/
+#undef NVTX_IMPL_GUARD
+#endif /* !defined(NVTX_VERSION) */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtCounters.h ADDED Viewed

	@@ -0,0 +1,311 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#include "nvToolsExtPayload.h"
+/**
+ * \brief The compatibility ID is used for versioning of this extension.
+ */
+#ifndef NVTX_EXT_COUNTERS_COMPATID
+#define NVTX_EXT_COUNTERS_COMPATID 0x0102
+#endif
+/**
+ * \brief The module ID identifies the payload extension. It has to be unique
+ * among the extension modules.
+ */
+#ifndef NVTX_EXT_COUNTERS_MODULEID
+#define NVTX_EXT_COUNTERS_MODULEID 4
+#endif
+#ifndef NVTX_COUNTER_IDS_V1
+#define NVTX_COUNTER_IDS_V1
+/** The counter ID is not specified. */
+#define NVTX_COUNTER_ID_NONE          0
+/** Static (user-provided, feed-forward) counter (group) IDs. */
+#define NVTX_COUNTER_ID_STATIC_START  (1 << 24)
+/** Dynamically (tool) generated counter (group) IDs */
+#define NVTX_COUNTER_ID_DYNAMIC_START (NVTX_STATIC_CAST(uint64_t, 1) << 32)
+#endif /* NVTX_COUNTER_IDS_V1 */
+/** Reasons for the missing sample value. */
+#ifndef NVTX_COUNTER_SAMPLES_V1
+#define NVTX_COUNTER_SAMPLES_V1
+#define NVTX_COUNTER_SAMPLE_ZERO        0
+#define NVTX_COUNTER_SAMPLE_UNCHANGED   1
+#define NVTX_COUNTER_SAMPLE_UNAVAILABLE 2 /* Failed to get a counter sample. */
+#endif /* NVTX_COUNTER_SAMPLES_V1 */
+/**
+ * Counter batch timestamp array flags.
+ * Values must not overlap with `NVTX_BATCH_FLAG_*`.
+ * By default, one timestamp per sample is assumed.
+ */
+#ifndef NVTX_COUNTER_BATCH_FLAGS_V1
+#define NVTX_COUNTER_BATCH_FLAGS_V1
+#define NVTX_COUNTER_BATCH_FLAG_BEGINTIME_INTERVAL_PAIR (1 << 32)
+#define NVTX_COUNTER_BATCH_FLAG_ENDTIME_INTERVAL_PAIR   (2 << 32)
+#endif /* NVTX_COUNTER_BATCH_FLAGS_V1 */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+#ifndef NVTX_COUNTER_TYPEDEFS_V1
+#define NVTX_COUNTER_TYPEDEFS_V1
+/**
+ * \brief Attributes of a counter or counter group.
+ */
+typedef struct nvtxCounterAttr_v1
+{
+    size_t structSize;
+    /**
+     * A schema ID referring to the data layout of the counter group or a
+     * predefined NVTX payloads number type.
+     */
+    uint64_t schemaId;
+    /** Name of the counter (group). */
+    const char* name;
+    /**
+     * Optional detailed description of the counter (group). A description for
+     * individual counters can be set in the schema registration.
+     */
+    const char* description;
+    /**
+     * Identifier of the counters' scope. A valid scope ID is either a
+     * predefined scope or the value returned by `nvtxScopeRegister` called for
+     * the same NVTX domain as `nvtxCounterRegister`. An invalid scope ID will
+     * be handled like `NVTX_SCOPE_NONE`.
+     */
+    uint64_t scopeId;
+    /**
+     * Optional semantics for a counter (group). The specified semantics apply
+     * to all counters in a group. If the semantics should only refer to a
+     * single counter in a group, the semantics field of the payload entry has
+     * to be used. Accepted semantics are `nvtxSemanticsCounter_t` and
+     * `nvtxSemanticsTime_t`.
+     */
+    const nvtxSemanticsHeader_t* semantics;
+    /**
+     * A static counter ID must be unique within the domain,
+     * >= NVTX_COUNTER_ID_STATIC_START, and < NVTX_COUNTER_ID_DYNAMIC_START.
+     * Use NVTX_COUNTER_ID_NONE to let the tool create a (dynamic) counter ID.
+     */
+    uint64_t counterId;
+} nvtxCounterAttr_t;
+/**
+ * \brief Helper struct to submit a batch of counters.
+ *
+ * The size of one sample is specified via the `payloadStaticSize` field of the
+ * counter's data layout schema or the size of the predefined payload entry type
+ * and must include padding. There should be no remainder when dividing
+ * `countersSize` by `nvtxPayloadSchemaAttr_t::payloadStaticSize`.
+ */
+typedef struct nvtxCounterBatch_v1
+{
+    /**
+     * Identifier of a counter group (data layout, scope, etc.). All counter
+     * samples in the batch have the same layout and size.
+     */
+    uint64_t counterId;
+    /** Batch of counter (group) samples. */
+    const void* counters;
+    /** Size of the counter batch (in bytes). */
+    size_t countersSize;
+    /**
+     * Timestamp ordering, timestamp style, etc.
+     * See `NVTX_BATCH_FLAG_*` and `NVTX_COUNTER_BATCH_FLAG_*`.
+     */
+    uint64_t flags;
+    /**
+     * Array of timestamps or a timestamp/interval pair. This field can be
+     * `NULL`, if timestamps are included in the counter samples as part of the
+     * counter group layout. By default, one timestamp per sample is assumed.
+     * The timestamp source is specified via time semantics passed during the
+     * counter group registration.
+     * This overrides the timestamps embedded in counter samples.
+     */
+    const int64_t* timestamps;
+    /** Size of the timestamps array or timestamp/interval pair (in bytes). */
+    size_t timestampsSize;
+} nvtxCounterBatch_t;
+#endif /* NVTX_COUNTER_TYPEDEFS_V1 */
+#ifndef NVTX_COUNTER_API_FUNCTIONS_V1
+#define NVTX_COUNTER_API_FUNCTIONS_V1
+/**
+ * \brief Register a counter (group).
+ *
+ * @param hDomain NVTX domain handle.
+ * @param attr Pointer to the attributes of the counter (group).
+ *
+ * @return Identifier of a counter (group). The counter ID is unique within
+ *         the NVTX domain.
+ */
+NVTX_DECLSPEC uint64_t NVTX_API nvtxCounterRegister(
+    nvtxDomainHandle_t hDomain,
+    const nvtxCounterAttr_t* attr);
+/**
+ * Sample one integer counter by value immediately
+ * (the NVTX tool determines the timestamp).
+ *
+ * @param hDomain handle of the NVTX domain.
+ * @param counterId identifier of the NVTX counter (group).
+ * @param value 64-bit integer counter value.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxCounterSampleInt64(
+    nvtxDomainHandle_t hDomain,
+    uint64_t counterId,
+    int64_t value);
+/**
+ * Sample one floating point counter by value immediately
+ * (the NVTX tool determines the timestamp).
+ *
+ * @param hDomain handle of the NVTX domain.
+ * @param counterId identifier of the NVTX counter (group).
+ * @param value 64-bit floating-point counter value.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxCounterSampleFloat64(
+    nvtxDomainHandle_t hDomain,
+    uint64_t counterId,
+    double value);
+/**
+ * Sample a counter (group) by reference immediately
+ * (the NVTX tool determines the timestamp).
+ *
+ * @param hDomain handle of the NVTX domain.
+ * @param counterId identifier of the NVTX counter (group).
+ * @param value pointer to one or more counter values.
+ * @param size size of the counter value(s) in bytes.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxCounterSample(
+    nvtxDomainHandle_t hDomain,
+    uint64_t counterId,
+    const void* value,
+    size_t size);
+/**
+ * \brief Sample without value.
+ *
+ * @param hDomain handle of the NVTX domain.
+ * @param counterId identifier of the NVTX counter (group).
+ * @param reason reason for the missing sample value.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxCounterSampleNoValue(
+    nvtxDomainHandle_t hDomain,
+    uint64_t counterId,
+    uint8_t reason);
+/**
+ * \brief Submit a batch of counters in the given domain.
+ *
+ * The size of a data sampling point is defined by the `payloadStaticSize` field
+ * of the payload schema. An NVTX tool can assume that the counter samples are
+ * stored as an array with each entry being `payloadStaticSize` bytes.
+ *
+ * @param hDomain handle of the NVTX domain
+ * @param counterData Pointer to the counter data to be submitted.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxCounterBatchSubmit(
+    nvtxDomainHandle_t hDomain,
+    const nvtxCounterBatch_t* counterData);
+#endif /* NVTX_COUNTER_API_FUNCTIONS_V1 */
+#ifndef NVTX_COUNTER_CALLBACK_ID_V1
+#define NVTX_COUNTER_CALLBACK_ID_V1
+#define NVTX3EXT_CBID_nvtxCounterRegister           0
+#define NVTX3EXT_CBID_nvtxCounterSampleInt64        1
+#define NVTX3EXT_CBID_nvtxCounterSampleFloat64      2
+#define NVTX3EXT_CBID_nvtxCounterSample             3
+#define NVTX3EXT_CBID_nvtxCounterSampleNoValue      4
+#define NVTX3EXT_CBID_nvtxCounterBatchSubmit        5
+#endif /* NVTX_COUNTER_CALLBACK_ID_V1 */
+/* Macros to create versioned symbols. */
+#ifndef NVTX_EXT_COUNTERS_VERSIONED_IDENTIFIERS_V1
+#define NVTX_EXT_COUNTERS_VERSIONED_IDENTIFIERS_V1
+#define NVTX_EXT_COUNTERS_VERSIONED_IDENTIFIER_L3(NAME, VERSION, COMPATID) \
+    NAME##_v##VERSION##_cnt##COMPATID
+#define NVTX_EXT_COUNTERS_VERSIONED_IDENTIFIER_L2(NAME, VERSION, COMPATID) \
+    NVTX_EXT_COUNTERS_VERSIONED_IDENTIFIER_L3(NAME, VERSION, COMPATID)
+#define NVTX_EXT_COUNTERS_VERSIONED_ID(NAME) \
+    NVTX_EXT_COUNTERS_VERSIONED_IDENTIFIER_L2(NAME, NVTX_VERSION, NVTX_EXT_COUNTERS_COMPATID)
+#endif /* NVTX_EXT_COUNTERS_VERSIONED_IDENTIFIERS_V1 */
+#ifdef __GNUC__
+#pragma GCC visibility push(internal)
+#endif
+#define NVTX_EXT_TYPES_GUARD /* Ensure other headers cannot be included directly. */
+#include "nvtxDetail/nvtxExtTypes.h"
+#undef NVTX_EXT_TYPES_GUARD
+#ifndef NVTX_NO_IMPL
+#define NVTX_EXT_IMPL_COUNTERS_GUARD /* Ensure other headers cannot be included directly. */
+#include "nvtxDetail/nvtxExtImplCounters_v1.h"
+#undef NVTX_EXT_IMPL_COUNTERS_GUARD
+#endif /*NVTX_NO_IMPL*/
+#ifdef __GNUC__
+#pragma GCC visibility pop
+#endif
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtCuda.h ADDED Viewed

	@@ -0,0 +1,164 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#include "nvToolsExt.h"
+#include "cuda.h"
+#ifndef NVTOOLSEXT_CUDA_V3
+#define NVTOOLSEXT_CUDA_V3
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+/* ========================================================================= */
+/** \name Functions for CUDA Resource Naming
+*/
+/** \addtogroup RESOURCE_NAMING
+ * \section RESOURCE_NAMING_CUDA CUDA Resource Naming
+ *
+ * This section covers the API functions that allow to annotate CUDA resources
+ * with user-provided names.
+ *
+ * @{
+ */
+/*  ------------------------------------------------------------------------- */
+/* \cond SHOW_HIDDEN
+* \brief Used to build a non-colliding value for resource types separated class
+* \version NVTX_VERSION_2
+*/
+#define NVTX_RESOURCE_CLASS_CUDA  4
+/** \endcond */
+/*  ------------------------------------------------------------------------- */
+/** \brief Resource types for CUDA
+*/
+typedef enum nvtxResourceCUDAType_t
+{
+    NVTX_RESOURCE_TYPE_CUDA_DEVICE = NVTX_RESOURCE_MAKE_TYPE(CUDA, 1), /* CUdevice */
+    NVTX_RESOURCE_TYPE_CUDA_CONTEXT = NVTX_RESOURCE_MAKE_TYPE(CUDA, 2), /* CUcontext */
+    NVTX_RESOURCE_TYPE_CUDA_STREAM = NVTX_RESOURCE_MAKE_TYPE(CUDA, 3), /* CUstream */
+    NVTX_RESOURCE_TYPE_CUDA_EVENT = NVTX_RESOURCE_MAKE_TYPE(CUDA, 4) /* CUevent */
+} nvtxResourceCUDAType_t;
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA device.
+ *
+ * Allows the user to associate a CUDA device with a user-provided name.
+ *
+ * \param device - The handle of the CUDA device to name.
+ * \param name   - The name of the CUDA device.
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceA(CUdevice device, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceW(CUdevice device, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA context.
+ *
+ * Allows the user to associate a CUDA context with a user-provided name.
+ *
+ * \param context - The handle of the CUDA context to name.
+ * \param name    - The name of the CUDA context.
+ *
+ * \par Example
+ * Name a CUDA context:
+ * \code
+ * CUresult status = cuCtxCreate( &cuContext, 0, cuDevice );
+ * if ( CUDA_SUCCESS != status )
+ *     goto Error;
+ * nvtxNameCuContext(cuContext, "CTX_NAME");
+ * \endcode
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCuContextA(CUcontext context, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCuContextW(CUcontext context, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA stream.
+ *
+ * Allows the user to associate a CUDA stream with a user-provided name.
+ *
+ * \param stream - The handle of the CUDA stream to name.
+ * \param name   - The name of the CUDA stream.
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamA(CUstream stream, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamW(CUstream stream, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA event.
+ *
+ * Allows the user to associate a CUDA event with a user-provided name.
+ *
+ * \param event - The handle of the CUDA event to name.
+ * \param name  - The name of the CUDA event.
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCuEventA(CUevent event, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCuEventW(CUevent event, const wchar_t* name);
+/** @} */
+/** @} */ /* END RESOURCE_NAMING */
+/* ========================================================================= */
+#ifdef UNICODE
+  #define nvtxNameCuDevice   nvtxNameCuDeviceW
+  #define nvtxNameCuContext  nvtxNameCuContextW
+  #define nvtxNameCuStream   nvtxNameCuStreamW
+  #define nvtxNameCuEvent    nvtxNameCuEventW
+#else
+  #define nvtxNameCuDevice   nvtxNameCuDeviceA
+  #define nvtxNameCuContext  nvtxNameCuContextA
+  #define nvtxNameCuStream   nvtxNameCuStreamA
+  #define nvtxNameCuEvent    nvtxNameCuEventA
+#endif
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#ifndef NVTX_NO_IMPL
+#define NVTX_IMPL_GUARD_CUDA /* Ensure other headers cannot be included directly */
+#include "nvtxDetail/nvtxImplCuda_v3.h"
+#undef NVTX_IMPL_GUARD_CUDA
+#endif /*NVTX_NO_IMPL*/
+#endif /* NVTOOLSEXT_CUDA_V3 */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtCudaRt.h ADDED Viewed

	@@ -0,0 +1,139 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#include "nvToolsExt.h"
+#include "cuda.h"
+#include "driver_types.h"
+#ifndef NVTOOLSEXT_CUDART_V3
+#define NVTOOLSEXT_CUDART_V3
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+/* ========================================================================= */
+/** \name Functions for CUDA Resource Naming
+*/
+/** \addtogroup RESOURCE_NAMING
+ * \section RESOURCE_NAMING_CUDART CUDA Runtime Resource Naming
+ *
+ * This section covers the API functions that allow to annotate CUDA resources
+ * with user-provided names.
+ *
+ * @{
+ */
+/*  ------------------------------------------------------------------------- */
+/* \cond SHOW_HIDDEN
+* \brief Used to build a non-colliding value for resource types separated class
+* \version NVTX_VERSION_2
+*/
+#define NVTX_RESOURCE_CLASS_CUDART 5
+/** \endcond */
+/*  ------------------------------------------------------------------------- */
+/** \brief Resource types for CUDART
+*/
+typedef enum nvtxResourceCUDARTType_t
+{
+    NVTX_RESOURCE_TYPE_CUDART_DEVICE = NVTX_RESOURCE_MAKE_TYPE(CUDART, 0), /* int device */
+    NVTX_RESOURCE_TYPE_CUDART_STREAM = NVTX_RESOURCE_MAKE_TYPE(CUDART, 1), /* cudaStream_t */
+    NVTX_RESOURCE_TYPE_CUDART_EVENT = NVTX_RESOURCE_MAKE_TYPE(CUDART, 2) /* cudaEvent_t */
+} nvtxResourceCUDARTType_t;
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA device.
+ *
+ * Allows the user to associate a CUDA device with a user-provided name.
+ *
+ * \param device - The id of the CUDA device to name.
+ * \param name   - The name of the CUDA device.
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceA(int device, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceW(int device, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA stream.
+ *
+ * Allows the user to associate a CUDA stream with a user-provided name.
+ *
+ * \param stream - The handle of the CUDA stream to name.
+ * \param name   - The name of the CUDA stream.
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamA(cudaStream_t stream, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamW(cudaStream_t stream, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA event.
+ *
+ * Allows the user to associate a CUDA event with a user-provided name.
+ *
+ * \param event - The handle of the CUDA event to name.
+ * \param name  - The name of the CUDA event.
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventA(cudaEvent_t event, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventW(cudaEvent_t event, const wchar_t* name);
+/** @} */
+/** @} */ /* END RESOURCE_NAMING */
+/* ========================================================================= */
+#ifdef UNICODE
+  #define nvtxNameCudaDevice nvtxNameCudaDeviceW
+  #define nvtxNameCudaStream nvtxNameCudaStreamW
+  #define nvtxNameCudaEvent  nvtxNameCudaEventW
+#else
+  #define nvtxNameCudaDevice nvtxNameCudaDeviceA
+  #define nvtxNameCudaStream nvtxNameCudaStreamA
+  #define nvtxNameCudaEvent  nvtxNameCudaEventA
+#endif
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#ifndef NVTX_NO_IMPL
+#define NVTX_IMPL_GUARD_CUDART /* Ensure other headers cannot be included directly */
+#include "nvtxDetail/nvtxImplCudaRt_v3.h"
+#undef NVTX_IMPL_GUARD_CUDART
+#endif /*NVTX_NO_IMPL*/
+#endif /* NVTOOLSEXT_CUDART_V3 */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtMem.h ADDED Viewed

	@@ -0,0 +1,749 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#include "nvToolsExt.h"
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+#ifndef NVTX_EXT_MODULEID_MEM
+#define NVTX_EXT_MODULEID_MEM 1
+#endif
+/* \cond SHOW_HIDDEN
+ * \brief A compatibility ID value used in structures and initialization to
+ * identify version differences.
+ */
+#ifndef NVTX_EXT_COMPATID_MEM
+#define NVTX_EXT_COMPATID_MEM 0x0102
+#endif
+/* \endcond
+ */
+#ifndef NVTX_MEM_CONTENTS_V1
+#define NVTX_MEM_CONTENTS_V1
+/* \cond SHOW_HIDDEN
+ * \brief This value is returned by functions that return `nvtxMemHeapHandle_t`,
+ * if a tool is not attached.
+ */
+#define NVTX_MEM_HEAP_HANDLE_NO_TOOL (NVTX_STATIC_CAST(nvtxMemHeapHandle_t, NVTX_STATIC_CAST(intptr_t, -1)))
+/* \endcond
+ */
+/* \cond SHOW_HIDDEN
+ * \brief This value is returned by functions that return `nvtxMemRegionHandle_t`
+ * if a tool is not attached.
+ */
+#define NVTX_MEM_REGION_HANDLE_NO_TOOL (NVTX_STATIC_CAST(nvtxMemRegionHandle_t, NVTX_STATIC_CAST(intptr_t, -1)))
+/* \endcond
+ */
+/* \cond SHOW_HIDDEN
+ * \brief This value is returned by functions that return `nvtxMemPermissionsHandle_t`
+ * if a tool is not attached.
+ */
+#define NVTX_MEM_PERMISSIONS_HANDLE_NO_TOOL (NVTX_STATIC_CAST(nvtxMemPermissionsHandle_t, -1))
+/* \endcond
+ */
+/* \cond SHOW_HIDDEN
+ * \brief This should not be used and is considered an error but defined to
+ * detect an accidental use of zero or NULL.
+ */
+#define NVTX_MEM_HEAP_USAGE_UNKNOWN 0x0
+/* \endcond
+ */
+/* \cond SHOW_HIDDEN
+ * \brief This should not be used and is considered an error but defined to
+ * detect an accidental use of zero or NULL.
+ */
+#define NVTX_MEM_TYPE_UNKNOWN 0x0
+/* \endcond
+ */
+/*  ------------------------------------------------------------------------- */
+/** \defgroup MEMORY Memory
+ * See page \ref PAGE_MEMORY.
+ * @{
+ */
+/**
+ * \brief To indicate the full process virtual address space as a heap for
+ * functions where a nvtxMemHeapHandle_t is accepted.
+ *
+ * The heap by default is always read-write-execute permissions without creating regions.
+ * Regions created in this heap have read-write access by default but not execute.
+ */
+#define NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE (NVTX_STATIC_CAST(nvtxMemHeapHandle_t, 0))
+/** \brief This heap is a sub-allocator.
+ *
+ * Heap created with this usage should not be accessed by the user until regions are registered.
+ * Regions from a heap with this usage have read-write access by default but not execute.
+ */
+#define NVTX_MEM_HEAP_USAGE_TYPE_SUB_ALLOCATOR 0x1
+/**
+ * \brief This is a heap of memory that has an explicit layout.
+ *
+ * The layout could be static or dynamic (calculated). This often represents an algorithm's
+ * structures that are packed together. By default this heap is assumed to be accessible for
+ * scopes where the memory is naturally accessible by hardware. Regions may be use to further
+ * annotate or restrict access. A tool may have an option to be more strict, but special
+ * consideration must be made for `NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE`.
+ *
+ * The behavior of this usage is similar to NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE but
+ * a tool can use it to track special behaviors and reservation.
+ *
+ * Memory in a heap with this usage has read-write permissions by default but not execute without
+ * creating regions. Regions created in this heap have the same default permission access.
+ */
+#define NVTX_MEM_HEAP_USAGE_TYPE_LAYOUT 0x2
+/**
+ * \brief Standard process userspace virtual addresses for linear allocations.
+ *
+ * APIs that map into this space, such as CUDA UVA should use this type.
+ *
+ * Relevant functions: cudaMalloc, cudaMallocManaged, cudaHostAlloc, cudaMallocHost
+ * NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE is supported
+ *
+ * nvtxMemHeapRegister receives a heapDesc of type nvtxMemVirtualRangeDesc_t
+ */
+#define NVTX_MEM_TYPE_VIRTUAL_ADDRESS 0x1
+/**
+ * \brief To indicate you are modifying permissions to the process-wide
+ * full virtual address space.
+ *
+ * This is a companion object to `NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE`.
+ */
+#define NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE (NVTX_STATIC_CAST(nvtxMemPermissionsHandle_t, 0))
+#define NVTX_MEM_PERMISSIONS_CREATE_FLAGS_NONE 0x0
+#define NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_READ 0x1
+#define NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_WRITE 0x2
+#define NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_ATOMIC 0x4
+/* \cond SHOW_HIDDEN
+ * \brief Forward declaration of opaque memory heap structure.
+ */
+struct nvtxMemHeap_v1;
+typedef struct nvtxMemHeap_v1 nvtxMemHeap_t;
+/* \endcond
+ */
+/** \brief A handle returned by a tool to represent a memory heap. */
+typedef nvtxMemHeap_t* nvtxMemHeapHandle_t;
+/* \cond SHOW_HIDDEN
+ * \brief Forward declaration of opaque memory heap structure.
+ */
+struct nvtxMemRegion_v1;
+typedef struct nvtxMemRegion_v1 nvtxMemRegion_t;
+/* \endcond
+ */
+/** \brief A handle returned by a tool to represent a memory region. */
+typedef nvtxMemRegion_t* nvtxMemRegionHandle_t;
+/** \brief A reference to a memory region (by pointer or handle).
+ * Which member of the union will be determined by a type or flag field outside.
+ */
+typedef union nvtxMemRegionRef_t
+{
+    void const* pointer;
+    nvtxMemRegionHandle_t handle;
+} nvtxMemRegionRef_t;
+/* \cond SHOW_HIDDEN
+ * \brief Forward declaration of opaque memory permissions structure
+ */
+struct nvtxMemPermissions_v1;
+typedef struct nvtxMemPermissions_v1 nvtxMemPermissions_t;
+/* \endcond
+ */
+/** \brief A handle returned by a tool to represent a memory permissions mask. */
+typedef nvtxMemPermissions_t* nvtxMemPermissionsHandle_t;
+typedef struct nvtxMemVirtualRangeDesc_v1
+{
+    size_t  size;
+    void const*  ptr;
+} nvtxMemVirtualRangeDesc_v1 ;
+typedef nvtxMemVirtualRangeDesc_v1 nvtxMemVirtualRangeDesc_t;
+/** \brief structure to describe a heap in process virtual memory. */
+typedef struct nvtxMemHeapDesc_v1
+{
+    uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
+    uint16_t structSize; /* Size of the structure. */
+    uint32_t reserved0;
+    /** \brief Usage characteristics of the heap
+     *
+     * Usage characteristics help tools like memcheckers, sanitizers,
+     * as well as other debugging and profiling tools to determine some
+     * special behaviors they should apply to the heap and its regions.
+     * The value follows the convention NVTX_MEM_HEAP_USAGE_*
+     *
+     * Default Value is 0, which is invalid.
+     */
+    uint32_t usage;
+    /** \brief Memory type characteristics of the heap
+     *
+     * The 'type' indicates how to interpret the ptr field of the heapDesc.
+     * This is intended to support many additional types of memory, beyond
+     * standard process virtual memory, such as API specific memory only
+     * addressed by handles or multi-dimensional memory requiring more complex
+     * descriptions to handle features like strides, tiling, or interlace.
+     *
+     * The values conforms to NVTX_MEM_TYPE_*
+     *
+     * The value in the field 'type' identifies the descriptor type that will
+     * be in the field 'typeSpecificDesc'.  'typeSpecificDesc' is void* because
+     * it is extensible.  Example usage is if type is NVTX_MEM_TYPE_VIRTUAL_ADDRESS,
+     * then typeSpecificDesc points to a nvtxMemVirtualRangeDesc_t.
+     *
+     * Default Value is 0, which is invalid.
+     */
+    uint32_t type;
+    /** \brief size of the heap memory descriptor pointed to by typeSpecificDesc
+     *
+     * Default Value is 0 which is invalid.
+     */
+    size_t typeSpecificDescSize;
+    /** \brief Pointer to the heap memory descriptor
+     *
+     * The value in the field 'type' identifies the descriptor type that will
+     * be in the field 'typeSpecificDesc'.  'typeSpecificDesc' is void* because
+     * it is extensible.  Example usage is if type is NVTX_MEM_TYPE_VIRTUAL_ADDRESS,
+     * then typeSpecificDesc points to a nvtxMemVirtualRangeDesc_t.
+     *
+     * Default Value is 0, which is invalid.
+     */
+    void const* typeSpecificDesc;
+    /** \brief ID of the category the event is assigned to.
+     *
+     * A category is a user-controlled ID that can be used to group
+     * events.  The tool may use category IDs to improve filtering or
+     * enable grouping of events in the same category. The functions
+     * \ref ::nvtxNameCategoryA or \ref ::nvtxNameCategoryW can be used
+     * to name a category.
+     *
+     * Default Value is 0.
+     */
+    uint32_t category;
+    /** \brief Message type specified in this attribute structure.
+     *
+     * Defines the message format of the attribute structure's \ref MEM_MESSAGE_FIELD
+     * "message" field.
+     *
+     * Default Value is `NVTX_MESSAGE_UNKNOWN`.
+     */
+    uint32_t messageType;            /* nvtxMessageType_t */
+    /** \brief Message assigned to this attribute structure. \anchor MEM_MESSAGE_FIELD
+     *
+     * The text message that is attached to an event.
+     */
+    nvtxMessageValue_t message;
+} nvtxMemHeapDesc_v1 ;
+typedef nvtxMemHeapDesc_v1 nvtxMemHeapDesc_t;
+/**
+ * \brief Create a memory heap to represent a object or range of memory that will be further
+ * sub-divided into regions.
+ *
+ * The handle used to address the heap will depend on the heap's type.  Where the heap is virtual
+ * memory accessible, the address of the heap's memory itself is its handle. This will likewise
+ * be returned from the function.
+ *
+ * For more advanced types, where the heap is not virtual memory accessible the tools may be
+ * responsible for returning a void const * that that uniquely identifies the object. Please see
+ * the description of each heap type for more details on whether this is expected to be a uniquely
+ * generated by the tool or otherwise.
+ */
+NVTX_DECLSPEC nvtxMemHeapHandle_t NVTX_API nvtxMemHeapRegister(
+    nvtxDomainHandle_t domain,
+    nvtxMemHeapDesc_t const* desc);
+ /** \brief Destroy a memory heap. */
+NVTX_DECLSPEC void NVTX_API nvtxMemHeapUnregister(
+    nvtxDomainHandle_t domain,
+    nvtxMemHeapHandle_t heap);/* NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE is not supported */
+/**
+ * \brief Reset the memory heap wipes out any changes, as if it were a fresh heap.
+ *
+ * This includes invalidating all regions and their handles.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxMemHeapReset(
+    nvtxDomainHandle_t domain,
+    nvtxMemHeapHandle_t heap); /* NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE is supported */
+/**
+ * \brief Register a region of memory inside of a heap.
+ *
+ * The heap refers the the heap within which the region resides. This can be from
+ * `nvtxMemHeapRegister`, `NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE`, or one provided
+ * from other extension API.
+ *
+ * The regionType arg will define which type is used in regionDescArray.
+ * The most commonly used type is `NVTX_MEM_TYPE_VIRTUAL_ADDRESS`.
+ * In this case regionDescElements is an array of `nvtxMemVirtualRangeDesc_t`.
+ *
+ * The regionCount arg is how many element are in regionDescArray and regionHandleArrayOut.
+ *
+ * The regionHandleArrayOut arg points to an array where the tool will provide region handles. If
+ * a pointer is provided, it is expected to have regionCount elements. This pointer can be NULL if
+ * regionType is NVTX_MEM_TYPE_VIRTUAL_ADDRESS. In this case, the user can use the pointer to the
+ * virtual memory to reference the region in other related functions which accept nvtxMemRegionRef_t.
+ */
+typedef struct nvtxMemRegionsRegisterBatch_v1
+{
+    uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
+    uint16_t structSize; /* Size of the structure. */
+    uint32_t regionType; /* NVTX_MEM_TYPE_* */
+    nvtxMemHeapHandle_t heap;
+    size_t regionCount;
+    size_t regionDescElementSize;
+    void const* regionDescElements; /* This will also become the handle for this region. */
+    nvtxMemRegionHandle_t* regionHandleElementsOut; /* This will also become the handle for this region. */
+} nvtxMemRegionsRegisterBatch_v1;
+typedef nvtxMemRegionsRegisterBatch_v1 nvtxMemRegionsRegisterBatch_t;
+ /** \brief Register a region of memory inside of a heap of linear process virtual memory
+ */
+NVTX_DECLSPEC void NVTX_API nvtxMemRegionsRegister(
+    nvtxDomainHandle_t domain,
+    nvtxMemRegionsRegisterBatch_t const* desc);
+/**
+ * \brief Register a region of memory inside of a heap.
+ *
+ * The heap refers the the heap within which the region resides.
+ * This can be from nvtxMemHeapRegister, NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE, or
+ * one provided from other extension API.
+ *
+ * The regionType arg will define which type is used in regionDescArray.
+ * The most commonly used type is NVTX_MEM_TYPE_VIRTUAL_ADDRESS.
+ *
+ * The regionCount arg is how many element are in regionDescArray and regionHandleArrayOut.
+ *
+ * The regionHandleArrayOut arg points to an array where the tool will provide region handles. If
+ * a pointer if provided, it is expected to have regionCount elements. This pointer can be NULL if
+ * regionType is NVTX_MEM_TYPE_VIRTUAL_ADDRESS. In this case, the user can use the pointer to the
+ * virtual memory to reference the region in other related functions which accept nvtxMemRegionRef_t.
+ */
+typedef struct nvtxMemRegionsResizeBatch_v1
+{
+    uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
+    uint16_t structSize; /* Size of the structure. */
+    uint32_t regionType; /* NVTX_MEM_TYPE_* */
+    size_t regionDescCount;
+    size_t regionDescElementSize;
+    void const* regionDescElements; /* This will also become the handle for this region. */
+} nvtxMemRegionsResizeBatch_v1;
+typedef nvtxMemRegionsResizeBatch_v1 nvtxMemRegionsResizeBatch_t;
+ /** \brief Register a region of memory inside of a heap of linear process virtual memory
+ */
+NVTX_DECLSPEC void NVTX_API nvtxMemRegionsResize(
+    nvtxDomainHandle_t domain,
+    nvtxMemRegionsResizeBatch_t const* desc);
+#define NVTX_MEM_REGION_REF_TYPE_UNKNOWN 0x0
+#define NVTX_MEM_REGION_REF_TYPE_POINTER 0x1
+#define NVTX_MEM_REGION_REF_TYPE_HANDLE 0x2
+/**
+ * \brief Register a region of memory inside of a heap.
+ *
+ * The heap refers the the heap within which the region resides.
+ * This can be from nvtxMemHeapRegister, `NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE`, or
+ * one provided from other extension API.
+ *
+ * The regionType arg will define which type is used in `regionDescArray`.
+ * The most commonly used type is NVTX_MEM_TYPE_VIRTUAL_ADDRESS.
+ *
+ * The regionCount arg is how many element are in regionDescArray and regionHandleArrayOut.
+ *
+ * The regionHandleArrayOut arg points to an array where the tool will provide region handles.
+ * If a pointer if provided, it is expected to have regionCount elements.
+ * This pointer can be NULL if regionType is NVTX_MEM_TYPE_VIRTUAL_ADDRESS.  In this case,
+ * the user can use the pointer to the virtual memory to reference the region in other
+ * related functions which accept a nvtxMemRegionRef_t.
+ */
+typedef struct nvtxMemRegionsUnregisterBatch_v1
+{
+    uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
+    uint16_t structSize; /* Size of the structure. */
+    uint32_t refType; /* NVTX_MEM_REGION_REF_TYPE_* */
+    size_t refCount; /* count of elements in refArray */
+    size_t refElementSize;
+    nvtxMemRegionRef_t const* refElements; /* This will also become the handle for this region. */
+} nvtxMemRegionsUnregisterBatch_v1;
+typedef nvtxMemRegionsUnregisterBatch_v1 nvtxMemRegionsUnregisterBatch_t;
+/**
+ * \brief Unregistration for regions of process virtual memory
+ *
+ * This is not necessary if the nvtx heap destroy function has been called that
+ * contains this object.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxMemRegionsUnregister(
+    nvtxDomainHandle_t domain,
+    nvtxMemRegionsUnregisterBatch_t const* desc);
+typedef struct nvtxMemRegionNameDesc_v1
+{
+    uint32_t regionRefType; /* NVTX_MEM_REGION_REF_TYPE_* */
+    uint32_t nameType; /* nvtxMessageType_t */
+    nvtxMemRegionRef_t region;
+    nvtxMessageValue_t name;
+    uint32_t category;
+    uint32_t reserved0;
+} nvtxMemRegionNameDesc_v1;
+typedef nvtxMemRegionNameDesc_v1 nvtxMemRegionNameDesc_t;
+typedef struct nvtxMemRegionsNameBatch_v1
+{
+    uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
+    uint16_t structSize; /* Size of the structure. */
+    uint32_t reserved0;
+    size_t regionCount;
+    size_t regionElementSize;
+    nvtxMemRegionNameDesc_t const* regionElements;
+    size_t reserved1;
+} nvtxMemRegionsNameBatch_v1 ;
+typedef nvtxMemRegionsNameBatch_v1 nvtxMemRegionsNameBatch_t;
+ /** \brief Name or rename a region. */
+NVTX_DECLSPEC void NVTX_API nvtxMemRegionsName(
+    nvtxDomainHandle_t domain,
+    nvtxMemRegionsNameBatch_t const* desc);
+/** \brief There are no permissions for this memory. */
+#define NVTX_MEM_PERMISSIONS_REGION_FLAGS_NONE 0x0
+/** \brief The memory is readable. */
+#define NVTX_MEM_PERMISSIONS_REGION_FLAGS_READ 0x1
+/** \brief The memory is writable. */
+#define NVTX_MEM_PERMISSIONS_REGION_FLAGS_WRITE 0x2
+/** \brief The memory is for atomic RW. */
+#define NVTX_MEM_PERMISSIONS_REGION_FLAGS_ATOMIC 0x4
+/**
+ * \brief The memory access permissions are reset for a region.
+ *
+ * This is as if never set, rather than documented defaults.  As as result any flags
+ * indicating how unspecified regions are handle will affect this area.
+ *
+ * This should not be used with READ, WRITE, nor ATOMIC, as those flags would have no effect.
+ */
+#define NVTX_MEM_PERMISSIONS_REGION_FLAGS_RESET 0x8
+typedef struct nvtxMemPermissionsAssignRegionDesc_v1
+{
+    uint32_t flags; /* NVTX_MEM_PERMISSIONS_REGION_FLAGS_* */
+    uint32_t regionRefType; /* NVTX_MEM_REGION_REF_TYPE_* */
+    nvtxMemRegionRef_t region;
+} nvtxMemPermissionsAssignRegionDesc_v1 ;
+typedef nvtxMemPermissionsAssignRegionDesc_v1 nvtxMemPermissionsAssignRegionDesc_t;
+typedef struct nvtxMemPermissionsAssignBatch_v1
+{
+    uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
+    uint16_t structSize; /* Size of the structure. */
+    uint32_t reserved0;
+    nvtxMemPermissionsHandle_t permissions;
+    size_t regionCount;
+    size_t regionElementSize;
+    nvtxMemPermissionsAssignRegionDesc_t const* regionElements;
+    size_t reserved1;
+} nvtxMemPermissionsAssignBatch_v1 ;
+typedef nvtxMemPermissionsAssignBatch_v1 nvtxMemPermissionsAssignBatch_t;
+ /** \brief Change the permissions of a region of process virtual memory. */
+NVTX_DECLSPEC void NVTX_API nvtxMemPermissionsAssign(
+    nvtxDomainHandle_t domain,
+    nvtxMemPermissionsAssignBatch_t const* desc);
+/**
+ * \brief Create a permissions object for fine grain thread-local control in
+ * multi-threading scenarios
+ *
+ * Unlike the global permissions object (NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE), a new
+ * permissions object is empty. There are no regions registered to it, so more memory is accessible
+ * if bound(bind) without calls to nvtxMemPermissionsSetAccess* first. The permissions are not
+ * active until nvtxMemPermissionsBind. See `nvtxMemPermissionsBind` for more details.
+ *
+ * Use the flags NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_* to control  how the regions in
+ * this permission object will interact with global permissions when bound. You may choose to
+ * either replace global memory regions setting or overlay on top of them. The most common uses are
+ * as follows:
+ *     * To limit tools to validate writing exclusively specified in this object but inherit all
+ *       global read access regions use `NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_WRITE`
+ *     * To limit tools to validate both read & write permissions exclusively specified in this
+ *        object use NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_READ
+ *                   & NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_WRITE
+ *
+ * Also see `nvtxMemPermissionsBind` & `nvtxMemPermissionsSetAccess*`.
+ */
+NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API nvtxMemPermissionsCreate(
+    nvtxDomainHandle_t domain,
+    int32_t creationflags); /* NVTX_MEM_PERMISSIONS_CREATE_FLAGS_* */
+/**
+ * \brief Destroy the permissions object.
+ *
+ * If bound(bind), destroy will also unbind it.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxMemPermissionsDestroy(
+    nvtxDomainHandle_t domain,
+    nvtxMemPermissionsHandle_t permissionsHandle); /* only supported on objects from nvtxMemPermissionsCreate */
+/** \brief Reset the permissions object back to its created state. */
+NVTX_DECLSPEC void NVTX_API nvtxMemPermissionsReset(
+    nvtxDomainHandle_t domain,
+    nvtxMemPermissionsHandle_t permissionsHandle);
+/* NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE and other special handles are supported */
+#define NVTX_MEM_PERMISSIONS_BIND_FLAGS_NONE 0x0
+ /** \brief Upon binding, with the thread, exclude parent scope write regions instead of overlaying on top of them.
+  *
+   * EX A developer may chose to first prevent all writes except the ones specified to avoid
+  * OOB writes, since there are typically fewer regions written to than read from.
+ **/
+#define NVTX_MEM_PERMISSIONS_BIND_FLAGS_STRICT_WRITE 0x2
+ /** \brief Upon binding, with the thread, exclude parent scope read regions instead of overlaying on top of them.
+  *
+  * EX After eliminating any errors when applying strict writes, a developer may then choose to
+  * annotate and enforce strict reads behaviors in segments of code.
+ **/
+#define NVTX_MEM_PERMISSIONS_BIND_FLAGS_STRICT_READ 0x1
+ /** \brief Upon binding, with the thread, exclude parent scope atomic RW regions instead of overlaying on top of them.
+  *
+  * EX After eliminating any errors from read and write, a developer may chose to ensure
+  * that atomics are in their own region, removing standard read/write, and replacing with
+  * this strict atomic only access.  This way they know that conventional reads or writes
+  * will not cause unexpected issues.
+ **/
+#define NVTX_MEM_PERMISSIONS_BIND_FLAGS_STRICT_ATOMIC 0x4
+#define NVTX_MEM_PERMISSIONS_BIND_SCOPE_UNKNOWN 0x0
+ /** \brief Bind to thread scope.  In this case, tools should validate that local thread's
+  * execution is honoring the permissions as well as the state of NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE
+  * at the time of binding.  If this is not bound then NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE should be
+  * used to validate the memory.
+  *
+  * Not all tools will support every scope, such a GPU sanitizer.
+ **/
+#define NVTX_MEM_PERMISSIONS_BIND_SCOPE_CPU_THREAD 0x1
+/**
+ * \brief Bind to CUDA stream scope.
+ *
+ * In this case, work enqueued to a CUDA stream should be validated by the tool,
+ * when it executes, that it respect the permission of the permission at the point
+ * of binding, as well as the appropriate nvtxMemCudaGetDevicePermissions at the
+ * time of binding. If this is not bound then nvtxMemCudaGetDevicePermissions at
+ * the time of stream enqueue should be used to validate the memory.
+ *
+ * This could apply to work done either on the GPU like a kernel launch or to
+ * CPU based callbacks like cudaStreamAddCallback if the tools supports it.
+ *
+ * Binding is applies locally to a CPU thread so that if N CPU threads are enqueuing
+ * work to the same stream (like the default stream) that there cannot be a race
+ * condition between thread binding vs launching their work. IE users should
+ * expect the permissions bound in the thread to be honored by the proceeding
+ * work (launches, copies, etc) invoked from in the CPU thread until unbound.
+ */
+#define NVTX_MEM_PERMISSIONS_BIND_SCOPE_CUDA_STREAM 0x2
+/**
+ * \brief Bind the permissions object into a particular scope on the caller thread
+ *
+ * Permissions do not take affect until binding. Binding permissions is a thread local
+ * activity that overrides global behaviors.  This is to avoid multi-threaded race conditions,
+ *
+ * The scope dictates what type of processing it applies to, and when in some cases.
+ * EX1: NVTX_MEM_PERMISSIONS_BIND_SCOPE_CPU_THREAD applies to CPU code accessing memory while bound.
+ * EX2: NVTX_MEM_PERMISSIONS_BIND_SCOPE_CUDA_STREAM applies to CUDA streams, and the permissions
+ * must be recorded and applied when the work in the stream dequeues to executes.  In this case
+ * it could be GPU or CPU, if the tool support both.
+ *
+ * Bind can be called again on the same object and thread to take any updates to the
+ * specified permission object or the inherited properties.
+ *
+ * Bind flags support changing how the binding process inherits region access control.
+ * In the case of thread scope this is NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE and from CUDA_STREAM
+ * this is nvtxMemCudaGetDevicePermissions.  Choosing stricter modes allows the user to
+ * further reduce the access with less work, since memory by default, behaves as natural
+ * until the NVTX annotations instructs a tool to treat it anther way.  See strict flags
+ * for more details.
+ *
+ * Also see nvtxMemPermissionsUnbind
+ */
+NVTX_DECLSPEC void NVTX_API nvtxMemPermissionsBind(
+    nvtxDomainHandle_t domain,
+    nvtxMemPermissionsHandle_t permissions, /* special object like NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE are not supported */
+    uint32_t bindScope, /* NVTX_MEM_PERMISSIONS_BIND_SCOPE_* */
+    uint32_t bindFlags); /* NVTX_MEM_PERMISSIONS_BIND_FLAGS_* */
+/**
+ * \brief Unbind the permissions object bound to the caller thread.
+ *
+ * Upon unbind, the thread local permissions for a scope are restored to the default
+ * behavior defined by the scope.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxMemPermissionsUnbind(
+    nvtxDomainHandle_t domain,
+    uint32_t bindScope);
+/** @} */
+#endif /* NVTX_MEM_CONTENTS_V1 */
+#ifndef NVTX_MEM_CALLBACK_ID_V1
+#define NVTX_MEM_CALLBACK_ID_V1
+#define NVTX3EXT_CBID_nvtxMemHeapRegister                  0
+#define NVTX3EXT_CBID_nvtxMemHeapUnregister                1
+#define NVTX3EXT_CBID_nvtxMemHeapReset                     2
+#define NVTX3EXT_CBID_nvtxMemRegionsRegister               3
+#define NVTX3EXT_CBID_nvtxMemRegionsResize                 4
+#define NVTX3EXT_CBID_nvtxMemRegionsUnregister             5
+#define NVTX3EXT_CBID_nvtxMemRegionsName                   6
+#define NVTX3EXT_CBID_nvtxMemPermissionsAssign             7
+#define NVTX3EXT_CBID_nvtxMemPermissionsCreate             8
+#define NVTX3EXT_CBID_nvtxMemPermissionsDestroy            9
+#define NVTX3EXT_CBID_nvtxMemPermissionsReset              10
+#define NVTX3EXT_CBID_nvtxMemPermissionsBind               11
+#define NVTX3EXT_CBID_nvtxMemPermissionsUnbind             12
+/* 13-16 in nvtxExtImplMemCudaRt_v1.h */
+#define NVTX3EXT_CBID_nvtxMemCudaGetProcessWidePermissions 13
+#define NVTX3EXT_CBID_nvtxMemCudaGetDeviceWidePermissions  14
+#define NVTX3EXT_CBID_nvtxMemCudaSetPeerAccess             15
+#define NVTX3EXT_CBID_nvtxMemCudaMarkInitialized           16
+#endif /* NVTX_MEM_CALLBACK_ID_V1 */
+/* Macros to create versioned symbols. */
+#ifndef NVTX_EXT_MEM_VERSIONED_IDENTIFIERS_V1
+#define NVTX_EXT_MEM_VERSIONED_IDENTIFIERS_V1
+#define NVTX_EXT_MEM_VERSIONED_IDENTIFIER_L3(NAME, VERSION, COMPATID) \
+    NAME##_v##VERSION##_mem##COMPATID
+#define NVTX_EXT_MEM_VERSIONED_IDENTIFIER_L2(NAME, VERSION, COMPATID) \
+    NVTX_EXT_MEM_VERSIONED_IDENTIFIER_L3(NAME, VERSION, COMPATID)
+#define NVTX_EXT_MEM_VERSIONED_ID(NAME) \
+    NVTX_EXT_MEM_VERSIONED_IDENTIFIER_L2(NAME, NVTX_VERSION, NVTX_EXT_COMPATID_MEM)
+#endif /* NVTX_EXT_MEM_VERSIONED_IDENTIFIERS_V1 */
+#ifdef __GNUC__
+#pragma GCC visibility push(internal)
+#endif
+/* Extension types are required for the implementation and the NVTX handler. */
+#define NVTX_EXT_TYPES_GUARD /* Ensure other headers cannot be included directly */
+#include "nvtxDetail/nvtxExtTypes.h"
+#undef NVTX_EXT_TYPES_GUARD
+#ifndef NVTX_NO_IMPL
+/* Ensure other headers cannot be included directly */
+#define NVTX_EXT_IMPL_MEM_GUARD
+#include "nvtxDetail/nvtxExtImplMem_v1.h"
+#undef NVTX_EXT_IMPL_MEM_GUARD
+#endif /*NVTX_NO_IMPL*/
+#ifdef __GNUC__
+#pragma GCC visibility pop
+#endif
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtMemCudaRt.h ADDED Viewed

	@@ -0,0 +1,217 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#include "nvToolsExtMem.h"
+#include "cuda.h"
+#include "cuda_runtime.h"
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+#ifndef NVTX_MEM_CUDART_CONTENTS_V1
+#define NVTX_MEM_CUDART_CONTENTS_V1
+/** \defgroup MEMORY_CUDART Memory CUDA Runtime
+ * See page \ref PAGE_MEMORY_CUDART.
+ * @{
+ */
+/** \brief The memory is from a CUDA runtime array.
+ *
+ * Relevant functions: cudaMallocArray,  cudaMalloc3DArray
+ * Also cudaArray_t from other types such as cudaMipmappedArray_t
+ *
+ * NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE is not supported
+ *
+ * nvtxMemHeapRegister receives a heapDesc of type cudaArray_t because the description can be retrieved by tools through cudaArrayGetInfo()
+ * nvtxMemRegionRegisterEx receives a regionDesc of type nvtxMemCudaArrayRangeDesc_t
+ */
+#define NVTX_MEM_TYPE_CUDA_ARRAY 0x11
+/** \brief structure to describe memory in a CUDA array object
+ */
+typedef struct nvtxMemCudaArrayRangeDesc_v1
+{
+    uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
+    uint16_t structSize; /* Size of the structure. */
+    uint32_t reserved0;
+    cudaArray_t  src;
+    size_t offset[3];
+    size_t extent[3];
+} nvtxMemCudaArrayRangeDesc_v1;
+typedef nvtxMemCudaArrayRangeDesc_v1 nvtxMemCudaArrayRangeDesc_t;
+/** \brief The memory is from a CUDA device array.
+ *
+ * Relevant functions: cuArrayCreate,  cuArray3DCreate
+ * Also CUarray from other types such as CUmipmappedArray
+ *
+ * NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE is not supported
+ *
+ * nvtxMemHeapRegister receives a heapDesc of type cudaArray_t because the description can be retrieved by tools through cudaArrayGetInfo()
+ * nvtxMemRegionRegisterEx receives a regionDesc of type nvtxMemCuArrayRangeDesc_t
+ */
+#define NVTX_MEM_TYPE_CU_ARRAY 0x12
+/** \brief structure to describe memory in a CUDA array object
+ */
+typedef struct nvtxMemCuArrayRangeDesc_v1
+{
+    uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
+    uint16_t structSize; /* Size of the structure. */
+    uint32_t reserved0;
+    CUarray  src;
+    size_t offset[3];
+    size_t extent[3];
+} nvtxMemCuArrayRangeDesc_v1;
+typedef nvtxMemCuArrayRangeDesc_v1 nvtxMemCuArrayRangeDesc_t;
+/* Reserving 0x2-0xF for more common types */
+#define NVTX_MEM_CUDA_PEER_ALL_DEVICES -1
+/** \brief Get the permission object that represent the CUDA runtime device
+ * or cuda driver context
+ *
+ * This object will allow developers to adjust permissions applied to work executed
+ * on the GPU.  It may be inherited or overridden by permissions object bound
+ * with NVTX_MEM_PERMISSIONS_BIND_SCOPE_CUDA_STREAM, depending on the binding flags.
+ *
+ * Ex. change the peer to peer access permissions between devices in entirety
+ * or punch through special holes
+ *
+ * By default, all memory is accessible that naturally would be to a CUDA kernel until
+ * modified otherwise by nvtxMemCudaSetPeerAccess or changing regions.
+ *
+ * This object should also represent the CUDA driver API level context.
+*/
+NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API nvtxMemCudaGetProcessWidePermissions(
+    nvtxDomainHandle_t domain);
+/** \brief Get the permission object that represent the CUDA runtime device
+ * or cuda driver context
+ *
+ * This object will allow developers to adjust permissions applied to work executed
+ * on the GPU.  It may be inherited or overridden by permissions object bound
+ * with NVTX_MEM_PERMISSIONS_BIND_SCOPE_CUDA_STREAM, depending on the binding flags.
+ *
+ * Ex. change the peer to peer access permissions between devices in entirety
+ * or punch through special holes
+ *
+ * By default, all memory is accessible that naturally would be to a CUDA kernel until
+ * modified otherwise by nvtxMemCudaSetPeerAccess or changing regions.
+ *
+ * This object should also represent the CUDA driver API level context.
+*/
+NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API nvtxMemCudaGetDeviceWidePermissions(
+    nvtxDomainHandle_t domain,
+    int device);
+/** \brief Change the default behavior for all memory mapped in from a particular device.
+ *
+ * While typically all memory defaults to readable and writable, users may desire to limit
+ * access to reduced default permissions such as read-only and a per-device basis.
+ *
+ * Regions can used to further override smaller windows of memory.
+ *
+ * devicePeer can be NVTX_MEM_CUDA_PEER_ALL_DEVICES
+ *
+*/
+NVTX_DECLSPEC void NVTX_API nvtxMemCudaSetPeerAccess(
+    nvtxDomainHandle_t domain,
+    nvtxMemPermissionsHandle_t permissions,
+    int devicePeer, /* device number such as from cudaGetDevice() or NVTX_MEM_CUDA_PEER_ALL_DEVICES */
+    uint32_t flags); /* NVTX_MEM_PERMISSIONS_REGION_FLAGS_* */
+/** \brief Mark memory ranges as initialized.
+*
+* The heap refers the the heap within which the region resides.
+* This can be from nvtxMemHeapRegister, NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE, or one provided from other extension API.
+*
+* The regionType arg will define which type is used in regionDescArray.
+* The most commonly used type is NVTX_MEM_TYPE_VIRTUAL_ADDRESS.
+*
+* The regionCount arg is how many element are in regionDescArray and regionHandleArrayOut.
+*
+* The regionHandleArrayOut arg points to an array where the tool will provide region handles.
+* If a pointer if provided, it is expected to have regionCount elements.
+* This pointer can be NULL if regionType is NVTX_MEM_TYPE_VIRTUAL_ADDRESS.  In this case,
+* the user can use the pointer to the virtual memory to reference the region in other
+* related functions which accept a nvtxMemRegionRef_t.
+*/
+typedef struct nvtxMemMarkInitializedBatch_v1
+{
+    uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
+    uint16_t structSize; /* Size of the structure. */
+    uint32_t regionType; /* NVTX_MEM_TYPE_* */
+    size_t regionDescCount;
+    size_t regionDescElementSize;
+    void const* regionDescElements; /* this will also become the handle for this region */
+} nvtxMemMarkInitializedBatch_v1;
+typedef nvtxMemMarkInitializedBatch_v1 nvtxMemMarkInitializedBatch_t;
+/** \brief Register a region of memory inside of a heap of linear process virtual memory
+*
+* stream is the CUDA stream where the range was accessed and initialized.
+*/
+NVTX_DECLSPEC void NVTX_API nvtxMemCudaMarkInitialized(
+    nvtxDomainHandle_t domain,
+    cudaStream_t stream,
+    uint8_t isPerThreadStream, /* 0 for false, otherwise true */
+    nvtxMemMarkInitializedBatch_t const* desc);
+/** @} */
+#endif /* NVTX_MEM_CUDART_CONTENTS_V1 */
+#ifdef __GNUC__
+#pragma GCC visibility push(internal)
+#endif
+#ifndef NVTX_NO_IMPL
+#define NVTX_EXT_IMPL_MEM_CUDART_GUARD /* Ensure other headers cannot be included directly */
+#include "nvtxDetail/nvtxExtImplMemCudaRt_v1.h"
+#undef NVTX_EXT_IMPL_MEM_CUDART_GUARD
+#endif /*NVTX_NO_IMPL*/
+#ifdef __GNUC__
+#pragma GCC visibility pop
+#endif
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtOpenCL.h ADDED Viewed

	@@ -0,0 +1,213 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#include "nvToolsExt.h"
+#include <CL/cl.h>
+#ifndef NVTOOLSEXT_OPENCL_V3
+#define NVTOOLSEXT_OPENCL_V3
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+/* ========================================================================= */
+/** \name Functions for OpenCL Resource Naming
+ */
+/** \addtogroup RESOURCE_NAMING
+ * \section RESOURCE_NAMING_OPENCL OpenCL Resource Naming
+ *
+ * This section covers the API functions that allow to annotate OpenCL resources
+ * with user-provided names.
+ *
+ * @{
+ */
+/*  ------------------------------------------------------------------------- */
+/* \cond SHOW_HIDDEN
+* \brief Used to build a non-colliding value for resource types separated class
+* \version NVTX_VERSION_2
+*/
+#define NVTX_RESOURCE_CLASS_OPENCL 6
+/** \endcond */
+/*  ------------------------------------------------------------------------- */
+/** \brief Resource types for OpenCL
+*/
+typedef enum nvtxResourceOpenCLType_t
+{
+    NVTX_RESOURCE_TYPE_OPENCL_DEVICE = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 1),
+    NVTX_RESOURCE_TYPE_OPENCL_CONTEXT = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 2),
+    NVTX_RESOURCE_TYPE_OPENCL_COMMANDQUEUE = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 3),
+    NVTX_RESOURCE_TYPE_OPENCL_MEMOBJECT = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 4),
+    NVTX_RESOURCE_TYPE_OPENCL_SAMPLER = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 5),
+    NVTX_RESOURCE_TYPE_OPENCL_PROGRAM = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 6),
+    NVTX_RESOURCE_TYPE_OPENCL_EVENT = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 7)
+} nvtxResourceOpenCLType_t;
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates an OpenCL device.
+ *
+ * Allows to associate an OpenCL device with a user-provided name.
+ *
+ * \param device - The handle of the OpenCL device to name.
+ * \param name   - The name of the OpenCL device.
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceA(cl_device_id device, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceW(cl_device_id device, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates an OpenCL context.
+ *
+ * Allows to associate an OpenCL context with a user-provided name.
+ *
+ * \param context - The handle of the OpenCL context to name.
+ * \param name    - The name of the OpenCL context.
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameClContextA(cl_context context, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameClContextW(cl_context context, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates an OpenCL command queue.
+ *
+ * Allows to associate an OpenCL command queue with a user-provided name.
+ *
+ * \param command_queue - The handle of the OpenCL command queue to name.
+ * \param name          - The name of the OpenCL command queue.
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueA(cl_command_queue command_queue, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueW(cl_command_queue command_queue, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates an OpenCL memory object.
+ *
+ * Allows to associate an OpenCL memory object with a user-provided name.
+ *
+ * \param memobj - The handle of the OpenCL memory object to name.
+ * \param name   - The name of the OpenCL memory object.
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectA(cl_mem memobj, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectW(cl_mem memobj, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates an OpenCL sampler.
+ *
+ * Allows to associate an OpenCL sampler with a user-provided name.
+ *
+ * \param sampler - The handle of the OpenCL sampler to name.
+ * \param name    - The name of the OpenCL sampler.
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerA(cl_sampler sampler, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerW(cl_sampler sampler, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates an OpenCL program.
+ *
+ * Allows to associate an OpenCL program with a user-provided name.
+ *
+ * \param program - The handle of the OpenCL program to name.
+ * \param name    - The name of the OpenCL program.
+ *
+ * \code
+ * cpProgram = clCreateProgramWithSource(cxGPUContext, 1,
+ *     (const char **) &cSourceCL, &program_length, &ciErrNum);
+ * shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
+ * nvtxNameClProgram(cpProgram, L"PROGRAM_NAME");
+ * \endcode
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameClProgramA(cl_program program, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameClProgramW(cl_program program, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates an OpenCL event.
+ *
+ * Allows to associate an OpenCL event with a user-provided name.
+ *
+ * \param evnt - The handle of the OpenCL event to name.
+ * \param name - The name of the OpenCL event.
+ *
+ * \version NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameClEventA(cl_event evnt, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameClEventW(cl_event evnt, const wchar_t* name);
+/** @} */
+/** @} */ /* END RESOURCE_NAMING */
+/* ========================================================================= */
+#ifdef UNICODE
+  #define nvtxNameClDevice        nvtxNameClDeviceW
+  #define nvtxNameClContext       nvtxNameClContextW
+  #define nvtxNameClCommandQueue  nvtxNameClCommandQueueW
+  #define nvtxNameClMemObject     nvtxNameClMemObjectW
+  #define nvtxNameClSampler       nvtxNameClSamplerW
+  #define nvtxNameClProgram       nvtxNameClProgramW
+  #define nvtxNameClEvent         nvtxNameClEventW
+#else
+  #define nvtxNameClDevice        nvtxNameClDeviceA
+  #define nvtxNameClContext       nvtxNameClContextA
+  #define nvtxNameClCommandQueue  nvtxNameClCommandQueueA
+  #define nvtxNameClMemObject     nvtxNameClMemObjectA
+  #define nvtxNameClSampler       nvtxNameClSamplerA
+  #define nvtxNameClProgram       nvtxNameClProgramA
+  #define nvtxNameClEvent         nvtxNameClEventA
+#endif
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#ifndef NVTX_NO_IMPL
+#define NVTX_IMPL_GUARD_OPENCL /* Ensure other headers cannot be included directly */
+#include "nvtxDetail/nvtxImplOpenCL_v3.h"
+#undef NVTX_IMPL_GUARD_OPENCL
+#endif /*NVTX_NO_IMPL*/
+#endif /* NVTOOLSEXT_OPENCL_V3 */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtPayload.h ADDED Viewed

	@@ -0,0 +1,1478 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#include "nvToolsExt.h"
+/* Optionally include helper macros. */
+/* #include "nvToolsExtPayloadHelper.h" */
+/**
+ * If needed, semantic extension headers can be included after this header.
+ */
+/**
+ * \brief The compatibility ID is used for versioning of this extension.
+ */
+#ifndef NVTX_EXT_PAYLOAD_COMPATID
+#define NVTX_EXT_PAYLOAD_COMPATID 0x0104
+#endif
+/**
+ * \brief The module ID identifies the payload extension. It has to be unique
+ * among the extension modules.
+ */
+#ifndef NVTX_EXT_PAYLOAD_MODULEID
+#define NVTX_EXT_PAYLOAD_MODULEID 2
+#endif
+/**
+ * \brief Additional value for the enum @ref nvtxPayloadType_t
+ */
+#ifndef NVTX_PAYLOAD_TYPE_EXT
+#define NVTX_PAYLOAD_TYPE_EXT (NVTX_STATIC_CAST(int32_t, 0xDFBD0009))
+#endif
+/** ---------------------------------------------------------------------------
+ * Payload schema entry flags. Used for @ref nvtxPayloadSchemaEntry_t::flags.
+ * ------------------------------------------------------------------------- */
+#ifndef NVTX_PAYLOAD_ENTRY_FLAGS_V1
+#define NVTX_PAYLOAD_ENTRY_FLAGS_V1
+#define NVTX_PAYLOAD_ENTRY_FLAG_UNUSED 0
+/**
+ * Absolute pointer into a payload (entry) of the same event.
+ */
+#define NVTX_PAYLOAD_ENTRY_FLAG_POINTER          (1 << 1)
+/**
+ * Offset from base address of the payload.
+ */
+#define NVTX_PAYLOAD_ENTRY_FLAG_OFFSET_FROM_BASE (1 << 2)
+/**
+ * Offset from the end of this payload entry.
+ */
+#define NVTX_PAYLOAD_ENTRY_FLAG_OFFSET_FROM_HERE (1 << 3)
+/**
+ * The value is an array with fixed length, set with the field `arrayLength`.
+ */
+#define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_FIXED_SIZE           (1 << 4)
+/**
+ * The value is a zero-/null-terminated array.
+ */
+#define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_ZERO_TERMINATED      (2 << 4)
+/**
+ * \brief A single or multi-dimensional array of variable length.
+ *
+ * The field `arrayOrUnionDetail` contains the index of the schema entry that
+ * holds the length(s). If the length entry is a scalar, then this entry is a 1D
+ * array. If the length entry is a fixed-size array, then the number of
+ * dimensions is defined with the registration of the schema. If the length
+ * entry is a zero-terminated array, then the array of the dimensions can be
+ * determined at runtime.
+ * For multidimensional arrays, values are stored in row-major order, with rows
+ * being stored consecutively in contiguous memory. The size of the entry (in
+ * bytes) is the product of the dimensions multiplied with size of the array
+ * element.
+ */
+#define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_LENGTH_INDEX         (3 << 4)
+/**
+ * \brief A single or multi-dimensional array of variable length, where the
+ * dimensions are stored in a different payload (index) of the same event.
+ *
+ * This enables an existing address to an array to be directly passed, while the
+ * dimensions are defined in a separate payload (with only one payload entry).
+ */
+#define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_LENGTH_PAYLOAD_INDEX (4 << 4)
+/**
+ * \brief The value or data that is pointed to by this payload entry value shall
+ * be copied by the NVTX handler.
+ *
+ * A tool may not support deep copy and just ignore this flag.
+ * See @ref NVTX_PAYLOAD_SCHEMA_FLAG_DEEP_COPY for more details.
+ */
+#define NVTX_PAYLOAD_ENTRY_FLAG_DEEP_COPY          (1 << 8)
+/**
+ * Notifies the NVTX handler to hide this entry in case of visualization.
+ */
+#define NVTX_PAYLOAD_ENTRY_FLAG_HIDE               (1 << 9)
+/**
+ * The entry specifies the event message. Any string type can be used.
+ */
+#define NVTX_PAYLOAD_ENTRY_FLAG_EVENT_MESSAGE      (1 << 10)
+/**
+ * \brief The entry contains a timestamp.
+ *
+ * The time source might be provided via the entry semantics field. In most
+ * cases, the timestamp (entry) type is @ref NVTX_PAYLOAD_ENTRY_TYPE_INT64.
+ */
+#define NVTX_PAYLOAD_ENTRY_FLAG_TIMESTAMP          (2 << 10)
+/**
+ * These flags specify the NVTX event type to which an entry refers.
+ */
+#define NVTX_PAYLOAD_ENTRY_FLAG_RANGE_BEGIN        (1 << 12)
+#define NVTX_PAYLOAD_ENTRY_FLAG_RANGE_END          (2 << 12)
+#define NVTX_PAYLOAD_ENTRY_FLAG_MARK               (3 << 12)
+#define NVTX_PAYLOAD_ENTRY_FLAG_COUNTER            (4 << 12)
+#endif /* NVTX_PAYLOAD_ENTRY_FLAGS_V1 */
+/** ---------------------------------------------------------------------------
+ * END: Payload schema entry flags.
+ * ------------------------------------------------------------------------- */
+/**
+ * @note The 'array' flags assume that the array is embedded. Otherwise,
+ * @ref NVTX_PAYLOAD_ENTRY_FLAG_POINTER has to be additionally specified. Some
+ * combinations may be invalid based on the `NVTX_PAYLOAD_SCHEMA_TYPE_*` this
+ * entry is enclosed. For instance, variable length embedded arrays are valid
+ * within @ref NVTX_PAYLOAD_SCHEMA_TYPE_DYNAMIC but invalid with
+ * @ref NVTX_PAYLOAD_SCHEMA_TYPE_STATIC. See `NVTX_PAYLOAD_SCHEMA_TYPE_*` for
+ * additional details.
+ */
+/* Helper macro to check if an entry represents an array. */
+#define NVTX_PAYLOAD_ENTRY_FLAG_IS_ARRAY (\
+    NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_FIXED_SIZE | \
+    NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_ZERO_TERMINATED | \
+    NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_LENGTH_INDEX)
+#define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_TYPE(F) \
+    ((F) & NVTX_PAYLOAD_ENTRY_FLAG_IS_ARRAY)
+/** ---------------------------------------------------------------------------
+ * Types of entries in a payload schema.
+ *
+ * @note Several of the predefined types contain the size (in bits) in their
+ * names. For some data types the size (in bytes) is not fixed and may differ
+ * for different platforms/operating systems/compilers. To provide portability,
+ * an array of sizes (in bytes) for type 1 to 28 ( @ref
+ * NVTX_PAYLOAD_ENTRY_TYPE_CHAR to @ref NVTX_PAYLOAD_ENTRY_TYPE_INFO_ARRAY_SIZE)
+ * is passed to the NVTX extension initialization function
+ * @ref InitializeInjectionNvtxExtension via the `extInfo` field of
+ * @ref nvtxExtModuleInfo_t.
+ * ------------------------------------------------------------------------- */
+#ifndef NVTX_PAYLOAD_ENTRY_TYPES_V1
+#define NVTX_PAYLOAD_ENTRY_TYPES_V1
+#define NVTX_PAYLOAD_ENTRY_TYPE_INVALID     0
+/**
+ * Basic integer types.
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_CHAR        1
+#define NVTX_PAYLOAD_ENTRY_TYPE_UCHAR       2
+#define NVTX_PAYLOAD_ENTRY_TYPE_SHORT       3
+#define NVTX_PAYLOAD_ENTRY_TYPE_USHORT      4
+#define NVTX_PAYLOAD_ENTRY_TYPE_INT         5
+#define NVTX_PAYLOAD_ENTRY_TYPE_UINT        6
+#define NVTX_PAYLOAD_ENTRY_TYPE_LONG        7
+#define NVTX_PAYLOAD_ENTRY_TYPE_ULONG       8
+#define NVTX_PAYLOAD_ENTRY_TYPE_LONGLONG    9
+#define NVTX_PAYLOAD_ENTRY_TYPE_ULONGLONG  10
+/**
+ * Integer types with explicit size.
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_INT8       11
+#define NVTX_PAYLOAD_ENTRY_TYPE_UINT8      12
+#define NVTX_PAYLOAD_ENTRY_TYPE_INT16      13
+#define NVTX_PAYLOAD_ENTRY_TYPE_UINT16     14
+#define NVTX_PAYLOAD_ENTRY_TYPE_INT32      15
+#define NVTX_PAYLOAD_ENTRY_TYPE_UINT32     16
+#define NVTX_PAYLOAD_ENTRY_TYPE_INT64      17
+#define NVTX_PAYLOAD_ENTRY_TYPE_UINT64     18
+/**
+ * Floating point types
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT      19
+#define NVTX_PAYLOAD_ENTRY_TYPE_DOUBLE     20
+#define NVTX_PAYLOAD_ENTRY_TYPE_LONGDOUBLE 21
+/**
+ * Size type (`size_t` in C).
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_SIZE       22
+/**
+ * Any address, e.g. `void*`. If the pointer type matters, use the flag @ref
+ * NVTX_PAYLOAD_ENTRY_FLAG_POINTER and the respective type instead.
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_ADDRESS    23
+/**
+ * Special character types.
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_WCHAR      24 /* wide character (since C90) */
+#define NVTX_PAYLOAD_ENTRY_TYPE_CHAR8      25 /* since C2x and C++20 */
+#define NVTX_PAYLOAD_ENTRY_TYPE_CHAR16     26
+#define NVTX_PAYLOAD_ENTRY_TYPE_CHAR32     27
+/**
+ * There is type size and alignment information for all previous types.
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_INFO_ARRAY_SIZE (NVTX_PAYLOAD_ENTRY_TYPE_CHAR32 + 1)
+/**
+ * Store raw 8-bit binary data. As with `char`, 1-byte alignment is assumed.
+ * Typically, a tool will display this as hex or binary.
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_BYTE       32
+/**
+ * These types do not have standardized equivalents. It is assumed that the
+ * number at the end corresponds to the bits used to store the value and that
+ * the alignment corresponds to standardized types of the same size.
+ * A tool may not support these types.
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_INT128     33
+#define NVTX_PAYLOAD_ENTRY_TYPE_UINT128    34
+#define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT16    42
+#define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT32    43
+#define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT64    44
+#define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT128   45
+#define NVTX_PAYLOAD_ENTRY_TYPE_BF16       50
+#define NVTX_PAYLOAD_ENTRY_TYPE_TF32       52
+/**
+ * Data types are as defined by NVTXv3 core.
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_CATEGORY   68 /* uint32_t */
+#define NVTX_PAYLOAD_ENTRY_TYPE_COLOR_ARGB 69 /* uint32_t */
+/**
+ * The scope of events or counters (see `nvtxScopeRegister`).
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_SCOPE_ID   70 /* uint64_t */
+/**
+ * Process ID as scope.
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_PID_UINT32 71
+#define NVTX_PAYLOAD_ENTRY_TYPE_PID_UINT64 72
+/**
+ * Thread ID as scope.
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_TID_UINT32 73
+#define NVTX_PAYLOAD_ENTRY_TYPE_TID_UINT64 74
+/**
+ * \brief String types.
+ *
+ * If no flags are set for the entry and `arrayOrUnionDetail > 0`, the entry is
+ * assumed to be a fixed-size string with the given length, embedded in the payload.
+ * `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_FIXED_SIZE` is redundant for fixed-size strings.
+ *
+ * Setting the flag `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_ZERO_TERMINATED` specifies a
+ * zero-terminated string. If `arrayOrUnionDetail > 0`, the entry is handled as
+ * a zero-terminated array of fixed-size strings.
+ *
+ * Setting the flag `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_LENGTH_INDEX` specifies a
+ * variable-length string with the length given in the entry specified by the
+ * field `arrayOrUnionDetail`.
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING       75 /* `char*`, system LOCALE */
+#define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING_UTF8  76
+#define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING_UTF16 77
+#define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING_UTF32 78
+/**
+ * The entry value is of type @ref nvtxStringHandle_t returned by
+ * @ref nvtxDomainRegisterString.
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_NVTX_REGISTERED_STRING_HANDLE 80
+/**
+ * This type marks the union selector member (entry index) in schemas used by
+ * a union with internal selector.
+ * See @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION_WITH_INTERNAL_SELECTOR.
+ */
+#define NVTX_PAYLOAD_ENTRY_TYPE_UNION_SELECTOR 100
+/**
+ * \brief Predefined schema ID for payload data that is referenced in another payload.
+ *
+ * This schema ID can be used in @ref nvtxPayloadData_t::schema_id to indicate that the
+ * payload is a blob of memory which other payload entries may point into.
+ * A tool will not expose this payload directly.
+ *
+ * This schema ID cannot be used as schema entry type!
+ */
+#define NVTX_TYPE_PAYLOAD_SCHEMA_REFERENCED 1022
+/**
+ * \brief Predefined schema ID for raw payload data.
+ *
+ * This schema ID can be used in @ref nvtxPayloadData_t::schema_id to indicate
+ * that the payload is a blob, which can be shown with an arbitrary data viewer.
+ * This schema ID cannot be used as schema entry type!
+ */
+#define NVTX_TYPE_PAYLOAD_SCHEMA_RAW        1023
+/* Custom (static) schema IDs. */
+#define NVTX_PAYLOAD_SCHEMA_ID_STATIC_START  (1 << 24)
+/* Dynamic schema IDs (generated by the tool) start here. */
+#define NVTX_PAYLOAD_SCHEMA_ID_DYNAMIC_START (NVTX_STATIC_CAST(uint64_t, 1) << 32)
+#endif /* NVTX_PAYLOAD_ENTRY_TYPES_V1 */
+/** ---------------------------------------------------------------------------
+ * END: Payload schema entry types.
+ * ------------------------------------------------------------------------- */
+#ifndef NVTX_PAYLOAD_SCHEMA_TYPES_V1
+#define NVTX_PAYLOAD_SCHEMA_TYPES_V1
+/**
+ * \brief The payload schema type.
+ *
+ * A schema can be either of the following types. It is set with
+ * @ref nvtxPayloadSchemaAttr_t::type.
+ */
+#define NVTX_PAYLOAD_SCHEMA_TYPE_INVALID                      0
+#define NVTX_PAYLOAD_SCHEMA_TYPE_STATIC                       1
+#define NVTX_PAYLOAD_SCHEMA_TYPE_DYNAMIC                      2
+#define NVTX_PAYLOAD_SCHEMA_TYPE_UNION                        3
+#define NVTX_PAYLOAD_SCHEMA_TYPE_UNION_WITH_INTERNAL_SELECTOR 4
+#endif /* NVTX_PAYLOAD_SCHEMA_TYPES_V1 */
+#ifndef NVTX_PAYLOAD_SCHEMA_FLAGS_V1
+#define NVTX_PAYLOAD_SCHEMA_FLAGS_V1
+/**
+ * \brief Flags for static and dynamic schemas.
+ *
+ * The schema flags are used with @ref nvtxPayloadSchemaAttr_t::flags.
+ */
+#define NVTX_PAYLOAD_SCHEMA_FLAG_NONE           0
+/**
+ * This flag indicates that a schema and the corresponding payloads can
+ * contain fields which require a deep copy.
+ */
+#define NVTX_PAYLOAD_SCHEMA_FLAG_DEEP_COPY      (1 << 1)
+/**
+ * This flag indicates that a schema and the corresponding payload can be
+ * referenced by another payload of the same event. If the schema is not
+ * intended to be visualized directly, it is possible use
+ * @ref NVTX_TYPE_PAYLOAD_SCHEMA_REFERENCED instead.
+ */
+#define NVTX_PAYLOAD_SCHEMA_FLAG_REFERENCED     (1 << 2)
+/**
+ * The schema defines a counter group. An NVTX handler can expect that the schema
+ * contains entries with counter semantics.
+ */
+#define NVTX_PAYLOAD_SCHEMA_FLAG_COUNTER_GROUP  (1 << 3)
+/**
+ * The schema defines a range or marker. An NVTX handler can expect that the
+ * schema contains a message and timestamp(s).
+ */
+#define NVTX_PAYLOAD_SCHEMA_FLAG_RANGE_PUSHPOP  (2 << 3)
+#define NVTX_PAYLOAD_SCHEMA_FLAG_RANGE_STARTEND (3 << 3)
+#define NVTX_PAYLOAD_SCHEMA_FLAG_MARK           (4 << 3)
+#endif /* NVTX_PAYLOAD_SCHEMA_FLAGS_V1 */
+#ifndef NVTX_PAYLOAD_SCHEMA_ATTR_FIELDS_V1
+#define NVTX_PAYLOAD_SCHEMA_ATTR_FIELDS_V1
+/**
+ * The values allow the valid fields in @ref nvtxPayloadSchemaAttr_t to be
+ * specified via setting the field `fieldMask`.
+ */
+#define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME        (1 << 1)
+#define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_TYPE        (1 << 2)
+#define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_FLAGS       (1 << 3)
+#define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_ENTRIES     (1 << 4)
+#define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NUM_ENTRIES (1 << 5)
+#define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_STATIC_SIZE (1 << 6)
+#define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_ALIGNMENT   (1 << 7)
+#define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_SCHEMA_ID   (1 << 8)
+#define NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_EXTENSION   (1 << 9)
+#endif /* NVTX_PAYLOAD_SCHEMA_ATTR_FIELDS_V1 */
+#ifndef NVTX_PAYLOAD_ENUM_ATTR_FIELDS_V1
+#define NVTX_PAYLOAD_ENUM_ATTR_FIELDS_V1
+/**
+ * The values are used to set the field `fieldMask` and specify which fields in
+ * @ref nvtxPayloadEnumAttr_t are set.
+ */
+#define NVTX_PAYLOAD_ENUM_ATTR_FIELD_NAME        (1 << 1)
+#define NVTX_PAYLOAD_ENUM_ATTR_FIELD_ENTRIES     (1 << 2)
+#define NVTX_PAYLOAD_ENUM_ATTR_FIELD_NUM_ENTRIES (1 << 3)
+#define NVTX_PAYLOAD_ENUM_ATTR_FIELD_SIZE        (1 << 4)
+#define NVTX_PAYLOAD_ENUM_ATTR_FIELD_SCHEMA_ID   (1 << 5)
+#define NVTX_PAYLOAD_ENUM_ATTR_FIELD_EXTENSION   (1 << 6)
+#endif /* NVTX_PAYLOAD_ENUM_ATTR_FIELDS_V1 */
+/**
+ * An NVTX scope specifies the execution scope or source of events or counters.
+ * A tool determines the value for a predefined scope when the sample is taken.
+ */
+#ifndef NVTX_SCOPES_V1
+#define NVTX_SCOPES_V1
+#define NVTX_SCOPE_NONE                    0 /* No scope specified. */
+#define NVTX_SCOPE_ROOT                    1 /* The root in a hierarchy. */
+/* Hardware events */
+#define NVTX_SCOPE_CURRENT_HW_MACHINE      2 /* Node/machine name */
+#define NVTX_SCOPE_CURRENT_HW_SOCKET       3
+#define NVTX_SCOPE_CURRENT_HW_CPU_PHYSICAL 4 /* Physical CPU core */
+#define NVTX_SCOPE_CURRENT_HW_CPU_LOGICAL  5 /* Logical CPU core */
+/* Innermost HW execution context */
+#define NVTX_SCOPE_CURRENT_HW_INNERMOST   15
+/* Virtualized hardware, virtual machines */
+#define NVTX_SCOPE_CURRENT_HYPERVISOR     16
+#define NVTX_SCOPE_CURRENT_VM             17
+#define NVTX_SCOPE_CURRENT_KERNEL         18
+#define NVTX_SCOPE_CURRENT_CONTAINER      19
+#define NVTX_SCOPE_CURRENT_OS             20
+/* Software scopes */
+#define NVTX_SCOPE_CURRENT_SW_PROCESS     21 /* Process scope */
+#define NVTX_SCOPE_CURRENT_SW_THREAD      22 /* Thread scope */
+/* Innermost SW execution context */
+#define NVTX_SCOPE_CURRENT_SW_INNERMOST   31
+/** Static (user-provided) scope IDs (feed forward) */
+#define NVTX_SCOPE_ID_STATIC_START  (1 << 24)
+/* Dynamically (tool) generated scope IDs */
+#define NVTX_SCOPE_ID_DYNAMIC_START (NVTX_STATIC_CAST(uint64_t, 1) << 32)
+#endif /* NVTX_SCOPES_V1 */
+#ifndef NVTX_TIME_V1
+#define NVTX_TIME_V1
+/**
+ * Timestamp source is not known, e.g. NIC or switch. The NVTX handler can
+ * assume that at least two synchronization points are created with NVTX
+ * instrumentation.
+ */
+#define NVTX_TIMESTAMP_TYPE_NONE  0
+/** The timestamp was provided by the NVTX handler via `nvtxTimestampGet()`. */
+#define NVTX_TIMESTAMP_TYPE_TOOL_PROVIDED  1
+/** CPU timestamp sources */
+#define NVTX_TIMESTAMP_TYPE_CPU_TSC  /* RDTSC on x86, CNTVCT on ARM */ 10
+#define NVTX_TIMESTAMP_TYPE_CPU_TSC_NONVIRTUALIZED /* CNTPCT on ARM */ 11
+#define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_REALTIME                 12
+#define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_REALTIME_COARSE          13
+#define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_MONOTONIC                14
+#define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_MONOTONIC_RAW            15
+#define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_MONOTONIC_COARSE         16
+#define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_BOOTTIME                 17
+#define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_PROCESS_CPUTIME_ID       18
+#define NVTX_TIMESTAMP_TYPE_CPU_CLOCK_GETTIME_THREAD_CPUTIME_ID        19
+#define NVTX_TIMESTAMP_TYPE_WIN_QPC      30
+#define NVTX_TIMESTAMP_TYPE_WIN_GSTAFT   31
+#define NVTX_TIMESTAMP_TYPE_WIN_GSTAFTP  32
+#define NVTX_TIMESTAMP_TYPE_C_TIME          40
+#define NVTX_TIMESTAMP_TYPE_C_CLOCK         41
+#define NVTX_TIMESTAMP_TYPE_C_TIMESPEC_GET  42
+#define NVTX_TIMESTAMP_TYPE_CPP_STEADY_CLOCK           50
+#define NVTX_TIMESTAMP_TYPE_CPP_HIGH_RESOLUTION_CLOCK  51
+#define NVTX_TIMESTAMP_TYPE_CPP_SYSTEM_CLOCK           52
+#define NVTX_TIMESTAMP_TYPE_CPP_UTC_CLOCK              53
+#define NVTX_TIMESTAMP_TYPE_CPP_TAI_CLOCK              54
+#define NVTX_TIMESTAMP_TYPE_CPP_GPS_CLOCK              55
+#define NVTX_TIMESTAMP_TYPE_CPP_FILE_CLOCK             56
+/** GPU timestamp sources */
+#define NVTX_TIMESTAMP_TYPE_GPU_GLOBALTIMER  80 /* e.g. PTIMER */
+/** Returned by `nvtxTimeDomainRegister` if time domain registration failed. */
+#define NVTX_TIME_DOMAIN_ID_NONE 0
+/** Static (user-provided) time domain IDs (feed forward) */
+#define NVTX_TIME_DOMAIN_ID_STATIC_START  (1 << 24)
+/* Dynamically (tool) generated time domain IDs */
+#define NVTX_TIME_DOMAIN_ID_DYNAMIC_START (NVTX_STATIC_CAST(uint64_t, 1) << 32)
+/** Timer properties */
+#define NVTX_TIMER_FLAG_NONE             0
+#define NVTX_TIMER_FLAG_CLOCK_MONOTONIC  (1 << 1)
+#define NVTX_TIMER_FLAG_CLOCK_STEADY     (1 << 2)
+/** Point in time when the timer starts (its value is 0). */
+#define NVTX_TIMER_START_UNKNOWN         0
+#define NVTX_TIMER_START_SYSTEM_BOOT     1
+#define NVTX_TIMER_START_VM_BOOT         2
+#define NVTX_TIMER_START_UNIX_EPOCH      3 /* 1 January 1970 */
+#define NVTX_TIMER_START_WIN_FILETIME    4 /* 1 January 1601 */
+/**
+ * Flags specifying whether it is safe or unsafe to call the timestamp
+ * provider after process teardown.
+ */
+#define NVTX_TIMER_SOURCE_SAFE_CALL_AFTER_PROCESS_TEARDOWN   0
+#define NVTX_TIMER_SOURCE_UNSAFE_CALL_AFTER_PROCESS_TEARDOWN 1
+#endif /* NVTX_TIME_V1 */
+#ifndef NVTX_BATCH_FLAGS_V1
+#define NVTX_BATCH_FLAGS_V1
+/**
+ * Timestamp ordering flags for a batch of deferred events or counters.
+ * By default, chronological order by the first timestamp of the event or
+ * counter is assumed.
+ */
+#define NVTX_BATCH_FLAG_TIME_SORTED            0
+#define NVTX_BATCH_FLAG_TIME_SORTED_PARTIALLY  (1 << 1)
+#define NVTX_BATCH_FLAG_TIME_SORTED_PER_SCOPE  (2 << 1)
+#define NVTX_BATCH_FLAG_UNSORTED               (3 << 1)
+#endif /* NVTX_BATCH_FLAGS_V1 */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+#ifndef NVTX_PAYLOAD_TYPEDEFS_V1
+#define NVTX_PAYLOAD_TYPEDEFS_V1
+/**
+ * \brief Size and alignment information for predefined payload entry types.
+ *
+ * The struct contains the size and the alignment size in bytes. A respective
+ * array for the predefined types is passed via nvtxExtModuleInfo_t to the NVTX
+ * client/handler. The type (ID) is used as index into this array.
+ */
+typedef struct nvtxPayloadEntryTypeInfo_v1
+{
+    uint16_t size;
+    uint16_t align;
+} nvtxPayloadEntryTypeInfo_t;
+/**
+ * \brief Binary payload data, size and decoding information.
+ *
+ * An array of type `nvtxPayloadData_t` is passed to the NVTX event attached to
+ * an NVTX event via the `payload.ullvalue` field of NVTX event attributes.
+ *
+ * The `schemaId` be a predefined schema entry type (`NVTX_PAYLOAD_ENTRY_TYPE*`),
+ * a schema ID (statically specified or dynamically created) or one of
+ * `NVTX_PAYLOAD_TYPE_REFERENCED` or `NVTX_PAYLOAD_TYPE_RAW`.
+ *
+ * Setting the size of a payload to `MAX_SIZE` can be useful to reduce the
+ * overhead of NVTX instrumentation, when no NVTX handler is attached. However,
+ * a tool might not be able to detect the size of a payload and thus skip it.
+ * A reasonable use case is a payload that represents a null-terminated
+ * C string, where the NVTX handler can call `strlen()`.
+ */
+typedef struct nvtxPayloadData_v1
+{
+    /**
+     * The schema ID, which defines the layout of the binary data.
+     */
+    uint64_t    schemaId;
+    /**
+     * Size of the payload (blob) in bytes. `SIZE_MAX` (`-1`) indicates the tool
+     * that it should figure out the size, which might not be possible.
+     */
+    size_t      size;
+    /**
+     * Pointer to the binary payload data.
+     */
+    const void* payload;
+} nvtxPayloadData_t;
+/**
+ * \brief Header of the payload entry's semantic field.
+ *
+ * If the semantic field of the payload schema entry is set, the first four
+ * fields (header) are defined with this type. A tool can iterate through the
+ * extensions and check, if it supports (can handle) it.
+ */
+typedef struct nvtxSemanticsHeader_v1
+{
+    uint32_t structSize; /** Size of semantic extension struct. */
+    uint16_t semanticId;
+    uint16_t version;
+    const struct nvtxSemanticsHeader_v1* next; /** linked list */
+    /* Additional fields are defined by the specific semantic extension. */
+} nvtxSemanticsHeader_t;
+/**
+ * \brief Entry in a schema.
+ *
+ * A payload schema consists of an array of payload schema entries. It is
+ * registered with @ref nvtxPayloadSchemaRegister. `flag` can be set to `0` for
+ * simple values, 'type' is the only "required" field. If not set explicitly,
+ * all other fields are zero-initialized, which means that the entry has no name
+ * and the offset is determined based on self-alignment rules.
+ *
+ * Example schema:
+ *  nvtxPayloadSchemaEntry_t schema[] = {
+ *      {0, NVTX_EXT_PAYLOAD_TYPE_UINT8, "one byte"},
+ *      {0, NVTX_EXT_PAYLOAD_TYPE_INT32, "four bytes"}
+ *  };
+ */
+typedef struct nvtxPayloadSchemaEntry_v1
+{
+    /**
+     * \brief Flags to augment the basic type.
+     *
+     * This field allows additional properties of the payload entry to be
+     * specified. Valid values are `NVTX_PAYLOAD_ENTRY_FLAG_*`.
+     */
+    uint64_t       flags;
+    /**
+     * \brief Predefined payload schema entry type or custom schema ID.
+     *
+     * Predefined types are `NVTX_PAYLOAD_ENTRY_TYPE_*`. Passing a schema ID
+     * enables nesting of schemas.
+     */
+    uint64_t       type;
+    /**
+     * \brief Name or label of the payload entry. (Optional)
+     *
+     * A meaningful name or label can help organizing and interpreting the data.
+     */
+    const char*    name;
+    /**
+     * \brief Description of the payload entry. (Optional)
+     *
+     * A more detail description of the data that is stored with this entry.
+     */
+    const char*    description;
+    /**
+     * \brief String length, array length or member selector for union types.
+     *
+     * If @ref type is a C string type, this field specifies the string length.
+     *
+     * If @ref flags specify that the entry is an array, this field specifies
+     * the array length. See `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_*` for more details.
+     *
+     * If @ref type is a union with schema type @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION
+     * (external selection of the union member), this field contains the index
+     * (starting with 0) to an entry of integral type in the same schema. The
+     * associated field value specifies the selected union member.
+     *
+     * @note An array of schema type @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION is not
+     * supported. @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION_WITH_INTERNAL_SELECTOR can
+     * be used instead.
+     */
+    uint64_t       arrayOrUnionDetail;
+    /**
+     * \brief Offset in the binary payload data (in bytes).
+     *
+     * This field specifies the byte offset from the base address of the actual
+     * binary data (blob) to the start address of the data of this entry.
+     *
+     * It is recommended (but not required) to provide the offset it. Otherwise,
+     * the NVTX handler will determine the offset from natural alignment rules.
+     * In some cases, e.g. dynamic schema layouts, the offset cannot be set and
+     * has to be determined based on the data of prior entries.
+     *
+     * Setting the offset can also be used to skip entries during payload parsing.
+     */
+    uint64_t       offset;
+    /**
+     * \brief Additional semantics of the payload entry.
+     *
+     * The field points to the first element in a linked list, which enables
+     * multiple semantic extensions.
+     */
+    const nvtxSemanticsHeader_t* semantics;
+    /**
+     * \brief Reserved for future use. Do not use it!
+     */
+    const void*    reserved;
+} nvtxPayloadSchemaEntry_t;
+/**
+ * \brief NVTX payload schema attributes.
+ */
+typedef struct nvtxPayloadSchemaAttr_v1
+{
+    /**
+     * \brief Mask of valid fields in this struct.
+     *
+     * Use the `NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_*` defines.
+     */
+    uint64_t                        fieldMask;
+    /**
+     * \brief Name of the payload schema. (Optional)
+     */
+    const char*                     name;
+    /**
+     * \brief Payload schema type. (Mandatory) \anchor PAYLOAD_TYPE_FIELD
+     *
+     * Use the `NVTX_PAYLOAD_SCHEMA_TYPE_*` defines.
+     */
+    uint64_t                        type;
+    /**
+     * \brief Payload schema flags. (Optional)
+     *
+     * Flags defined by `NVTX_PAYLOAD_SCHEMA_FLAG_*` can be used to set
+     * additional properties of the schema.
+     */
+    uint64_t                        flags;
+    /**
+     * \brief Entries of a payload schema. (Mandatory) \anchor ENTRIES_FIELD
+     *
+     * This field is a pointer to an array of schema entries, each describing a
+     * field in a data structure, e.g. in a C struct or union.
+     */
+    const nvtxPayloadSchemaEntry_t* entries;
+    /**
+     * \brief Number of entries in the payload schema. (Mandatory)
+     *
+     * Number of entries in the array of payload entries \ref ENTRIES_FIELD.
+     */
+    size_t                          numEntries;
+    /**
+     * \brief The binary payload size in bytes for static payload schemas.
+     *
+     * If \ref PAYLOAD_TYPE_FIELD is @ref NVTX_PAYLOAD_SCHEMA_TYPE_DYNAMIC this
+     * value is ignored. If this field is not specified for a schema of type
+     * @ref NVTX_PAYLOAD_SCHEMA_TYPE_STATIC, the size can be automatically
+     * determined by a tool.
+     */
+    size_t                          payloadStaticSize;
+    /**
+     * \brief The byte alignment for packed structures.
+     *
+     * If not specified, this field defaults to `0`, which means that the fields
+     * in the data structure are not packed and natural alignment rules can be
+     * applied.
+     */
+    size_t                          packAlign;
+    /**
+     * A static payload schema ID must be unique within the domain,
+     * >= NVTX_PAYLOAD_SCHEMA_ID_STATIC_START and
+     * < NVTX_PAYLOAD_SCHEMA_ID_DYNAMIC_START
+     */
+    uint64_t                        schemaId;
+    /**
+     * Flexible extension for schema attributes.
+     * (Do not use. Reserved for future use.)
+     */
+    void*                           extension;
+} nvtxPayloadSchemaAttr_t;
+/**
+ * \brief This type is used to describe an enumeration.
+ *
+ * Since the value of an enum entry might not be meaningful for the analysis
+ * and/or visualization, a tool can show the name of enum entry instead.
+ *
+ * An array of this struct is passed to @ref nvtxPayloadEnumAttr_t::entries to be
+ * finally registered via @ref nvtxPayloadEnumRegister with the NVTX handler.
+ *
+ * @note EXPERIMENTAL
+ */
+typedef struct nvtxPayloadEnum_v1
+{
+    /**
+     * Name of the enum value.
+     */
+    const char* name;
+    /**
+     * Value of the enum entry.
+     */
+    uint64_t    value;
+    /**
+     * Indicates that this entry sets a specific set of bits, which can be used
+     * to define bitsets.
+     */
+    int8_t      isFlag;
+} nvtxPayloadEnum_t;
+/**
+ * \brief NVTX payload enumeration type attributes.
+ *
+ * A pointer to this struct is passed to @ref nvtxPayloadEnumRegister.
+ */
+typedef struct nvtxPayloadEnumAttr_v1
+{
+    /**
+     * Mask of valid fields in this struct. See `NVTX_PAYLOAD_ENUM_ATTR_FIELD_*`.
+     */
+    uint64_t                 fieldMask;
+    /**
+     * Name of the enum. (Optional)
+     */
+    const char*              name;
+    /**
+     * Entries of the enum. (Mandatory)
+     */
+    const nvtxPayloadEnum_t* entries;
+    /**
+     * Number of entries in the enum. (Mandatory)
+     */
+    size_t                   numEntries;
+    /**
+     * Size of enumeration type in bytes
+     */
+    size_t                   sizeOfEnum;
+    /**
+     * A static payload schema ID must be unique within the domain,
+     * >= NVTX_PAYLOAD_SCHEMA_ID_STATIC_START and
+     * < NVTX_PAYLOAD_SCHEMA_ID_DYNAMIC_START
+     */
+    uint64_t                 schemaId;
+    /**
+     * Flexible extension for enumeration attributes.
+     * (Do not use. Reserved for future use.)
+     */
+    void*                    extension;
+} nvtxPayloadEnumAttr_t;
+typedef struct nvtxScopeAttr_v1
+{
+    size_t      structSize;
+    /**
+     * Path delimited by '/' characters, relative to @ref parentScope. Leading
+     * slashes are ignored. Nodes in the path may use name[key] syntax to
+     * indicate an array of sibling nodes, which may be combined with other
+     * non-array nodes or different arrays at the same scope. Node names should
+     * be UTF8 printable characters. '\' has to be used to escape '/', '[', and
+     * ']' characters in node names. An empty C string "" and `NULL` are valid
+     * inputs and treated equivalently.
+     *
+     * A GPU can be specified using its:
+     * - Unique identifier (UUID) with "GPU[UUID:#]",
+     * - CUDA device ID (sensitive to CUDA_VISIBLE_DEVICES) with "GPU[CUDAID:#]",
+     * - NVML (nvidia-smi) device ID with "GPU[NVSMI:#]"
+     *
+     * (replace `#` with the actual device ID).
+     * For display purposes, a tool is recommended to show a pretty name.
+     * To clearly identify a GPU, the @ref parentScope should also match
+     * the GPU's execution context.
+     */
+    const char* path;
+    /** Identifier of the parent scope, to which `path` is appended. */
+    uint64_t    parentScope;
+    /**
+     * Static scope ID. Must be unique within the domain,
+     * >= NVTX_SCOPE_ID_STATIC_START, and < NVTX_SCOPE_ID_DYNAMIC_START.
+     * Use NVTX_SCOPE_NONE to let the tool create a (dynamic) scope ID.
+     */
+    uint64_t    scopeId;
+} nvtxScopeAttr_t;
+#endif /* NVTX_PAYLOAD_TYPEDEFS_V1 */
+#ifndef NVTX_PAYLOAD_TYPEDEFS_DEFERRED_V1
+#define NVTX_PAYLOAD_TYPEDEFS_DEFERRED_V1
+/** Attributes of an NVTX time domain. */
+typedef struct nvtxTimeDomainAttr_v1
+{
+    /** Identifyer of the NVTX scope the time domain is associated with. */
+    uint64_t scopeId;
+    /** Predefined `NVTX_TIMESTAMP_TYPE_*`. */
+    uint64_t timestampTypeId;
+    /**
+     * Static (feed-forward) time domain ID. `0` makes the tool generate the ID.
+     * The static schema ID must be >= NVTX_TIME_DOMAIN_ID_STATIC_START and
+     * < NVTX_TIME_DOMAIN_ID_DYNAMIC_START
+     */
+    uint64_t timeDomainId;
+    /** Properties of the timer (use NVTX_TIMER_FLAG_*). */
+    uint64_t timerFlags;
+    /** Ticks per second (0 means unknown). */
+    int64_t  timerResolution;
+    /** Point in time when the timer starts (use NVTX_TIMER_START_*). */
+    uint64_t timerStart;
+} nvtxTimeDomainAttr_t;
+/** Synchronization point between two time domains. */
+typedef struct nvtxSyncPoint_v1
+{
+    int64_t src;
+    int64_t dst;
+} nvtxSyncPoint_t;
+/**
+ * \brief Helper struct to submit a batch of events (marks or ranges).
+ *
+ * By default, events are assumed to be chronologically sorted by the first
+ * timestamp in the event (start time in a range). If the events are not sorted,
+ * the `flags` field must be set accordingly (see `NVTX_BATCH_FLAG_*`).
+ */
+typedef struct nvtxEventBatch_v1
+{
+    /**
+     * Identifier of the data layout of a deferred event in the array of events.
+     * Only layouts with static payload size are allowed. The size of an event
+     * in the array is specified by the static payload size during the schema
+     * registration. The time domain of event timestamps is provided via time
+     * semantics in the schema registration.
+     */
+    uint64_t    eventSchemaId;
+    /** Size of the array of deferred events (in bytes). */
+    size_t      size;
+    /** Pointer to the array of deferred events. */
+    const void* events;
+    /** Scope of all events or counters in the batch. */
+    uint64_t    scope;
+    /** Timestamp ordering (sorted, partially sorted, unsorted), etc. */
+    uint64_t    flags;
+    /** Flexible data which can be referenced by events in the batch. */
+    const void* flexData;
+    /** Size of the flexible data memory blob. */
+    size_t      flexDataSize;
+    /**
+     * Offset from the `flexData` pointer to the begin of the flexible data
+     * in bytes.
+     */
+    size_t      flexDataOffset;
+} nvtxEventBatch_t;
+#endif /* NVTX_PAYLOAD_TYPEDEFS_DEFERRED_V1 */
+#ifndef NVTX_PAYLOAD_API_FUNCTIONS_V1
+#define NVTX_PAYLOAD_API_FUNCTIONS_V1
+/**
+ * \brief Register a payload schema.
+ *
+ * @param domain NVTX domain handle.
+ * @param attr NVTX payload schema attributes.
+ */
+NVTX_DECLSPEC uint64_t NVTX_API nvtxPayloadSchemaRegister(
+    nvtxDomainHandle_t domain,
+    const nvtxPayloadSchemaAttr_t* attr);
+/**
+ * \brief Register an enumeration type with the payload extension.
+ *
+ * @param domain NVTX domain handle
+ * @param attr NVTX payload enumeration type attributes.
+ */
+NVTX_DECLSPEC uint64_t NVTX_API nvtxPayloadEnumRegister(
+    nvtxDomainHandle_t domain,
+    const nvtxPayloadEnumAttr_t* attr);
+/**
+ * \brief Register a scope.
+ *
+ * @param domain NVTX domain handle
+ * @param attr Scope attributes.
+ *
+ * @return an identifier for the scope. If the operation was not successful,
+ * `NVTX_SCOPE_NONE` is returned.
+ */
+NVTX_DECLSPEC uint64_t NVTX_API nvtxScopeRegister(
+    nvtxDomainHandle_t domain,
+    const nvtxScopeAttr_t* attr);
+/**
+ * \brief Marks an instantaneous event in the application with the attributes
+ * being passed via the extended payload.
+ *
+ * An NVTX handler can assume that the payload contains the event message.
+ * Otherwise, it might ignore the event.
+ *
+ * @param domain NVTX domain handle
+ * @param payloadData pointer to an array of structured payloads.
+ * @param count number of payload BLOBs.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxMarkPayload(
+    nvtxDomainHandle_t domain,
+    const nvtxPayloadData_t* payloadData,
+    size_t count);
+/**
+ * \brief Begin a nested thread range with the attributes being passed via the
+ * payload.
+ *
+ * @param domain NVTX domain handle
+ * @param payloadData Pointer to an array of extended payloads.
+ * @param count Number of payloads.
+ *
+ * @return The level of the range being ended. If an error occurs a negative
+ * value is returned on the current thread.
+ */
+NVTX_DECLSPEC int NVTX_API nvtxRangePushPayload(
+    nvtxDomainHandle_t domain,
+    const nvtxPayloadData_t* payloadData,
+    size_t count);
+/**
+ * \brief End a nested thread range with an additional custom payload.
+ *
+ * NVTX event attributes passed to this function (via the payloads) overwrite
+ * event attributes (message and color) that have been set in the push event.
+ * Other payload entries extend the data of the range.
+ *
+ * @param domain NVTX domain handle
+ * @param payloadData pointer to an array of structured payloads.
+ * @param count number of payload BLOBs.
+ *
+ * @return The level of the range being ended. If an error occurs a negative
+ * value is returned on the current thread.
+ */
+NVTX_DECLSPEC int NVTX_API nvtxRangePopPayload(
+    nvtxDomainHandle_t domain,
+    const nvtxPayloadData_t* payloadData,
+    size_t count);
+/**
+ * \brief Start a thread range with attributes passed via the extended payload.
+ *
+ * @param domain NVTX domain handle
+ * @param payloadData pointer to an array of structured payloads.
+ * @param count number of payload BLOBs.
+ *
+ * @return The level of the range being ended. If an error occurs a negative
+ * value is returned on the current thread.
+ */
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartPayload(
+    nvtxDomainHandle_t domain,
+    const nvtxPayloadData_t* payloadData,
+    size_t count);
+/**
+ * \brief End a thread range and pass a custom payload.
+ *
+ * NVTX event attributes passed to this function (via the payloads) overwrite
+ * event attributes (message and color) that have been set in the start event.
+ * Other payload entries extend the data of the range.
+ *
+ * @param domain NVTX domain handle
+ * @param id The correlation ID returned from a NVTX range start call.
+ * @param payloadData pointer to an array of structured payloads.
+ * @param count number of payload BLOBs.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxRangeEndPayload(
+    nvtxDomainHandle_t domain,
+    nvtxRangeId_t id,
+    const nvtxPayloadData_t* payloadData,
+    size_t count);
+/**
+ * \brief Checks if the given NVTX domain is enabled.
+ *
+ * This function can be used to guard expensive code instrumentation.
+ * In general, it is recommended to avoid different execution branches based on
+ * NVTX instrumenation.
+ *
+ * If no tool is attached, this function will always return `0`.
+ * If a tool is attached, but does not handle this function, `1` is returned.
+ * If a tool is attached and handles this function, the return value is
+ * determined by the tool. Positive (>0) return values indicate that the domain
+ * is enabled, `0` indicates that the domain is disabled.
+ *
+ * @param domain NVTX domain handle
+ * @return 0 if the domain is disabled. Values > 0 indicate an enabled domain.
+ */
+NVTX_DECLSPEC uint8_t NVTX_API nvtxDomainIsEnabled(
+    nvtxDomainHandle_t domain);
+#endif /* NVTX_PAYLOAD_API_FUNCTIONS_V1 */
+#ifndef NVTX_PAYLOAD_API_FUNCTIONS_DEFERRED_V1
+#define NVTX_PAYLOAD_API_FUNCTIONS_DEFERRED_V1
+/**
+ * Get a timestamp from the NVTX handler or tool. If no tool is attached, the
+ * CPU TSC might be returned. No guarantees are made.
+ * The returned timestamp is just meant to be used in deferred events/counters.
+ */
+NVTX_DECLSPEC int64_t NVTX_API nvtxTimestampGet(void);
+/**
+ * Register a time domain. Associates an NVTX scope with the time domain.
+ * Timestamps of NVTX events or counters in the scope are interpreted according
+ * to the time domain definitions.
+ *
+ * @param domain NVTX domain handle.
+ * @param timeAttr Time domain attributes (timestamp type, scope, flags, etc.).
+ * @return time domain ID.
+ */
+NVTX_DECLSPEC uint64_t NVTX_API nvtxTimeDomainRegister(
+    nvtxDomainHandle_t domain,
+    const nvtxTimeDomainAttr_t* timeAttr);
+/**
+ * Provide the pointer to a function that returns a timestamp.
+ * This enables the tool to create time synchronization points.
+ *
+ * @param domain NVTX domain handle.
+ * @param timeDomainId time domain identifier or timestamp type ID, if it is
+ *                     unambiguous.
+ * @param flags indicates if it is safe to call the timestamp provider after
+ *             process teardown.
+ * @param timestampProviderFn Pointer to a function that returns a timestamp.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxTimerSource(
+    nvtxDomainHandle_t domain,
+    uint64_t timeDomainId,
+    uint64_t flags,
+    int64_t (*timestampProviderFn)(void));
+/**
+ * Same as `nvtxTimerSource`, but with an additional data pointer argument.
+ *
+ * @param domain NVTX domain handle.
+ * @param timeDomainId time domain identifier or timestamp type ID, if it is
+ *                     unambiguous.
+ * @param flags indicates if it is safe to call the timestamp provider after
+ *             process teardown.
+ * @param timestampProviderFn Pointer to a function that returns a timestamp.
+ * @param data Pointer to data that is passed to the timestamp provider function.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxTimerSourceWithData(
+    nvtxDomainHandle_t domain,
+    uint64_t timeDomainId,
+    uint64_t flags,
+    int64_t (*timestampProviderFn)(void* data),
+    void* data);
+/**
+ * Provides a synchronization point between two time domains.
+ * Two synchronization points are required to enable a timestamp conversion.
+ * The tool must know one of the time domains or it least must be able to chain
+ * conversions to enable the conversion between the given timestamps.
+ *
+ * @param domain NVTX domain handle.
+ * @param timeDomainId1 time domain 1 ID or timestamp type ID, if it is
+ *                      unambiguous.
+ * @param timeDomainId2 time domain 2 ID or timestamp type ID, if it is
+ *                      unambiguous.
+ * @param timestamp1 Timestamp in the first time domain.
+ * @param timestamp2 Timestamp in the second time domain.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxTimeSyncPoint(
+    nvtxDomainHandle_t domain,
+    uint64_t timeDomainId1,
+    uint64_t timeDomainId2,
+    int64_t timestamp1,
+    int64_t timestamp2);
+/**
+ * The same as `nvtxTimeSyncPoint` but with multiple synchronization points.
+ *
+ * @param domain NVTX domain handle.
+ * @param timeDomainIdSrc source time domain ID or timestamp type ID, if it is
+ *                        unambiguous.
+ * @param timeDomainIdDst destination time domain ID or timestamp type ID, if it
+ *                        is unambiguous.
+ * @param syncPoints Pointer to an array of synchronization points.
+ * @param count Number of synchronization points.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxTimeSyncPointTable(
+    nvtxDomainHandle_t domain,
+    uint64_t timeDomainIdSrc,
+    uint64_t timeDomainIdDst,
+    const nvtxSyncPoint_t* syncPoints,
+    size_t count);
+/**
+ * @brief Pass a conversion factor between two time domains to the NVTX handler.
+ *
+ * @param domain NVTX domain handle.
+ * @param timeDomainIdSrc source time domain ID or timestamp type ID, if it is
+ *                        unambiguous.
+ * @param timeDomainIdDst destination time domain ID or timestamp type ID, if it
+ *                        is unambiguous.
+ * @param slope Conversion factor between the two time domains.
+ * @param timestampSrc Timestamp in the source time domain.
+ * @param timestampDst Timestamp in the destination time domain.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxTimestampConversionFactor(
+    nvtxDomainHandle_t domain,
+    uint64_t timeDomainIdSrc,
+    uint64_t timeDomainIdDst,
+    double slope,
+    int64_t timestampSrc,
+    int64_t timestampDst);
+/**
+ * @brief Submit one deferred event.
+ *
+ * @param domain NVTX domain handle.
+ * @param payloadData Pointer to an array of structured payloads.
+ * @param numPayloads Number of payloads of the event.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxEventSubmit(
+    nvtxDomainHandle_t domain,
+    const nvtxPayloadData_t* payloadData,
+    size_t numPayloads);
+/**
+ * \brief Submit a batch of deferred events in the given domain.
+ *
+ * @param domain NVTX domain handle.
+ * @param eventBatch Pointer to deferred events batch details.
+ */
+NVTX_DECLSPEC void NVTX_API nvtxEventBatchSubmit(
+    nvtxDomainHandle_t domain,
+    const nvtxEventBatch_t* eventBatch);
+#endif /* NVTX_PAYLOAD_API_FUNCTIONS_DEFERRED_V1 */
+/**
+ * \brief Callback IDs of API functions in the payload extension.
+ *
+ * The NVTX handler can use these values to register a handler function. When
+ * `InitializeInjectionNvtxExtension(nvtxExtModuleInfo_t* moduleInfo)` is
+ * executed, a handler routine can be registered as follows:
+ * \code{.c}
+ *      moduleInfo->segments->slots[NVTX3EXT_CBID_nvtxPayloadSchemaRegister] =
+ *          (intptr_t)PayloadSchemaRegisterHandlerFn;
+ * \endcode
+ */
+#ifndef NVTX_PAYLOAD_CALLBACK_ID_V1
+#define NVTX_PAYLOAD_CALLBACK_ID_V1
+#define NVTX3EXT_CBID_nvtxPayloadSchemaRegister      0
+#define NVTX3EXT_CBID_nvtxPayloadEnumRegister        1
+#define NVTX3EXT_CBID_nvtxMarkPayload                2
+#define NVTX3EXT_CBID_nvtxRangePushPayload           3
+#define NVTX3EXT_CBID_nvtxRangePopPayload            4
+#define NVTX3EXT_CBID_nvtxRangeStartPayload          5
+#define NVTX3EXT_CBID_nvtxRangeEndPayload            6
+#define NVTX3EXT_CBID_nvtxDomainIsEnabled            7
+#define NVTX3EXT_CBID_nvtxScopeRegister             12
+#endif /* NVTX_PAYLOAD_CALLBACK_ID_V1 */
+#ifndef NVTX_PAYLOAD_CALLBACK_ID_DEFERRED_V1
+#define NVTX_PAYLOAD_CALLBACK_ID_DEFERRED_V1
+#define NVTX3EXT_CBID_nvtxTimestampGet               8
+#define NVTX3EXT_CBID_nvtxTimeDomainRegister         9
+#define NVTX3EXT_CBID_nvtxTimerSource               10
+#define NVTX3EXT_CBID_nvtxTimerSourceWithData       11
+#define NVTX3EXT_CBID_nvtxTimeSyncPoint             13
+#define NVTX3EXT_CBID_nvtxTimeSyncPointTable        14
+#define NVTX3EXT_CBID_nvtxTimestampConversionFactor 15
+#define NVTX3EXT_CBID_nvtxEventSubmit               16
+#define NVTX3EXT_CBID_nvtxEventBatchSubmit          17
+#endif /* NVTX_PAYLOAD_CALLBACK_ID_DEFERRED_V1 */
+/*** Helper utilities ***/
+/** \brief  Helper macro for safe double-cast of pointer to uint64_t value. */
+#ifndef NVTX_POINTER_AS_PAYLOAD_ULLVALUE
+# ifdef __cplusplus
+# define NVTX_POINTER_AS_PAYLOAD_ULLVALUE(p) \
+    static_cast<uint64_t>(reinterpret_cast<uintptr_t>(p))
+# else
+#define NVTX_POINTER_AS_PAYLOAD_ULLVALUE(p) (NVTX_STATIC_CAST(uint64_t, NVTX_STATIC_CAST(uintptr_t, p))
+# endif
+#endif
+#ifndef NVTX_PAYLOAD_EVTATTR_SET_DATA
+/**
+ * \brief Helper macro to attach a single payload to an NVTX event attribute.
+ *
+ * @param evtAttr NVTX event attribute (variable name)
+ * @param pldata_addr Address of `nvtxPayloadData_t` variable.
+ * @param schema_id NVTX binary payload schema ID.
+ * @param pl_addr Address of the (actual) payload.
+ * @param sz size of the (actual) payload.
+ */
+#define NVTX_PAYLOAD_EVTATTR_SET_DATA(evtAttr, pldata_addr, schema_id, pl_addr, sz) \
+    (pldata_addr)->schemaId = schema_id; \
+    (pldata_addr)->size = sz; \
+    (pldata_addr)->payload = pl_addr; \
+    (evtAttr).payload.ullValue = NVTX_POINTER_AS_PAYLOAD_ULLVALUE(pldata_addr); \
+    (evtAttr).payloadType = NVTX_PAYLOAD_TYPE_EXT; \
+    (evtAttr).reserved0 = 1;
+#endif /* NVTX_PAYLOAD_EVTATTR_SET_DATA */
+#ifndef NVTX_PAYLOAD_EVTATTR_SET_MULTIPLE
+/**
+ * \brief Helper macro to attach multiple payloads to an NVTX event attribute.
+ *
+ * @param evtAttr NVTX event attribute (variable name)
+ * @param pldata Payload data array (of type `nvtxPayloadData_t`)
+ */
+#define NVTX_PAYLOAD_EVTATTR_SET_MULTIPLE(evtAttr, pldata) \
+    (evtAttr).payloadType = NVTX_PAYLOAD_TYPE_EXT; \
+    (evtAttr).reserved0 = sizeof(pldata)/sizeof(nvtxPayloadData_t); \
+    (evtAttr).payload.ullValue = NVTX_POINTER_AS_PAYLOAD_ULLVALUE(pldata);
+#endif /* NVTX_PAYLOAD_EVTATTR_SET_MULTIPLE */
+#ifndef NVTX_PAYLOAD_EVTATTR_SET
+/*
+ * Do not use this macro directly! It is a helper to attach a single payload to
+ * an NVTX event attribute.
+ * @warning The NVTX push, start or mark operation must not be in an outer scope.
+ */
+#define NVTX_PAYLOAD_EVTATTR_SET(evtAttr, schema_id, pl_addr, sz) \
+    nvtxPayloadData_t _NVTX_PAYLOAD_DATA_VAR[] = \
+        {{schema_id, sz, pl_addr}}; \
+    (evtAttr)->payload.ullValue = \
+        NVTX_POINTER_AS_PAYLOAD_ULLVALUE(_NVTX_PAYLOAD_DATA_VAR); \
+    (evtAttr)->payloadType = NVTX_PAYLOAD_TYPE_EXT; \
+    (evtAttr)->reserved0 = 1;
+#endif /* NVTX_PAYLOAD_EVTATTR_SET */
+#ifndef nvtxPayloadRangePush
+/**
+ * \brief Helper macro to push a range with extended payload.
+ *
+ * @param domain NVTX domain handle
+ * @param evtAttr pointer to NVTX event attribute.
+ * @param schemaId NVTX payload schema ID
+ * @param plAddr Pointer to the binary data (actual payload)
+ * @param size Size of the binary payload data in bytes.
+ */
+#define nvtxPayloadRangePush(domain, evtAttr, schemaId, plAddr, size) \
+do { \
+    NVTX_PAYLOAD_EVTATTR_SET(evtAttr, schemaId, plAddr, size) \
+    nvtxDomainRangePushEx(domain, evtAttr); \
+} while (0)
+#endif /* nvtxPayloadRangePush */
+#ifndef nvtxPayloadMark
+/**
+ * \brief Helper macro to set a marker with extended payload.
+ *
+ * @param domain NVTX domain handle
+ * @param evtAttr pointer to NVTX event attribute.
+ * @param schemaId NVTX payload schema ID
+ * @param plAddr Pointer to the binary data (actual payload)
+ * @param size Size of the binary payload data in bytes.
+ */
+#define nvtxPayloadMark(domain, evtAttr, schemaId, plAddr, size) \
+do { \
+    NVTX_PAYLOAD_EVTATTR_SET(evtAttr, schemaId, plAddr, size) \
+    nvtxDomainMarkEx(domain, evtAttr); \
+} while (0)
+#endif /* nvtxPayloadMark */
+/* Macros to create versioned symbols. */
+#ifndef NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIERS_V1
+#define NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIERS_V1
+#define NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIER_L3(NAME, VERSION, COMPATID) \
+    NAME##_v##VERSION##_bpl##COMPATID
+#define NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIER_L2(NAME, VERSION, COMPATID) \
+    NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIER_L3(NAME, VERSION, COMPATID)
+#define NVTX_EXT_PAYLOAD_VERSIONED_ID(NAME) \
+    NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIER_L2(NAME, NVTX_VERSION, NVTX_EXT_PAYLOAD_COMPATID)
+#endif /* NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIERS_V1 */
+#ifdef __GNUC__
+#pragma GCC visibility push(internal)
+#endif
+/* Extension types are required for the implementation and the NVTX handler. */
+#define NVTX_EXT_TYPES_GUARD
+#include "nvtxDetail/nvtxExtTypes.h"
+#undef NVTX_EXT_TYPES_GUARD
+#ifndef NVTX_NO_IMPL
+#define NVTX_EXT_IMPL_PAYLOAD_GUARD
+#include "nvtxDetail/nvtxExtImplPayload_v1.h"
+#undef NVTX_EXT_IMPL_PAYLOAD_GUARD
+#endif /* NVTX_NO_IMPL */
+#ifdef __GNUC__
+#pragma GCC visibility pop
+#endif
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtPayloadHelper.h ADDED Viewed

	@@ -0,0 +1,192 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#include "nvtxDetail/nvtxExtPayloadHelperInternal.h"
+/* This is just an empty marker (for readability), which can be omitted. */
+/* TODO: Fix issue with trailing comma at end of entry list. */
+#define NVTX_PAYLOAD_ENTRIES
+/**
+ * Use this macro for payload entries that are defined by a schema (nested
+ * payload schema).
+ */
+#define NVTX_PAYLOAD_NESTED(schemaId) _NVTX_PAYLOAD_NESTED(schemaId)
+/**
+ * \brief Define a payload schema for an existing C `struct` definition.
+ *
+ *  This macro does
+ *   1) create schema description (array of schema entries).
+ *   2) set the schema attributes for a static data layout.
+ *
+ * It can be used in static code or within a function context.
+ *
+ * Example:
+ *  NVTX_DEFINE_SCHEMA_FOR_STRUCT(your_struct, "SchemaName",
+ *      NVTX_PAYLOAD_ENTRIES(
+ *          (index, TYPE_INT, "integer value"),
+ *          (dpfloat, TYPE_DOUBLE, "fp64 value"),
+ *          (text, TYPE_CSTRING, "text", NULL, 24)
+ *      )
+ *  )
+ *
+ * It is required to at least provide the struct name and the payload entries.
+ * The first two fields (member name and NVTX entry type) of each payload entry
+ * are required.
+ *
+ * The optional parameters are only allowed to be passed in the predefined order.
+ * Hence, `payload_flags` requires `payload_schema` to be given and
+ * `prefix` requires `payload_flags` and `payload_schema` to be given.
+ * The payload entries are always the last parameter. A maximum of 16 schema
+ * entries is supported.
+ *
+ * It is recommended to use `NVTX_PAYLOAD_SCHEMA_REGISTER` to register the schema.
+ *
+ * @param struct_id The name of the struct.
+ * @param schema_name (Optional 1) name of the payload schema. Default is `NULL`.
+ * @param prefix (Optional 2) prefix before the schema and attributes variables,
+ *               e.g. `static const`. Leave this empty, if no prefix is desired.
+ * @param schema_flags (Optional 2) flags to augment the payload schema.
+ *                     Default is `NVTX_PAYLOAD_SCHEMA_FLAG_NONE`.
+ * @param schema_id (Optional 4) User-defined payload schema ID.
+ * @param entries (Mandatory) Payload schema entries. This is always the last
+ *                parameter to the macro.
+ */
+#define NVTX_DEFINE_SCHEMA_FOR_STRUCT(struct_id, ...) \
+    _NVTX_DEFINE_SCHEMA_FOR_STRUCT(struct_id, __VA_ARGS__)
+/**
+ * \brief Define a C struct together with a matching schema.
+ *
+ * This macro does
+ *   1) define the payload type (typedef struct).
+ *   2) create schema description (array of schema entries).
+ *   3) set the schema attributes for a static data layout.
+ *
+ * The macro can be used in static code or within a function context.
+ *
+ * It defines the schema attributes in `struct_id##Attr`. Thus, it is recommended
+ * to use `NVTX_PAYLOAD_SCHEMA_REGISTER(domain, struct_id)` to register the schema.
+ *
+ * Example:
+ *  NVTX_DEFINE_STRUCT_WITH_SCHEMA(your_struct_name, "Your schema name",
+ *      NVTX_PAYLOAD_ENTRIES(
+ *          (int, index, TYPE_INT, "integer value"),
+ *          (double, dpfloat, TYPE_DOUBLE, "fp64 value"),
+ *          (const char, (text, 24), TYPE_CSTRING, "text", NULL, 24)
+ *      )
+ *  )
+ *
+ * The first three fields (C type, member, entry type) of each entry are required.
+ * A fixed-size array or string requires a special notation with the member
+ * name and the size separated by comma and put into brackets (see last entry
+ * in the example).
+ *
+ * The optional parameters are positional (only allowed to be passed in the
+ * predefined order). A maximum of 16 schema entries is supported.
+ *
+ * @param struct_id The name of the struct.
+ * @param schema_name (Optional 1) name of the payload schema. Default is `NULL`.
+ * @param prefix (Optional 2) prefix before the schema and attributes variables,
+ *               e.g. `static const`. Leave this empty, if no prefix is desired.
+ * @param schema_flags (Optional 3) flags to augment the payload schema.
+ *                     Default is `NVTX_PAYLOAD_SCHEMA_FLAG_NONE`.
+ * @param schema_id (Optional 4) User-defined payload schema ID.
+ * @param entries (Mandatory) The schema entries. This is always the last
+ *                parameter to the macro.
+ */
+#define NVTX_DEFINE_STRUCT_WITH_SCHEMA(struct_id, ...) \
+    _NVTX_DEFINE_STRUCT_WITH_SCHEMA(struct_id, __VA_ARGS__)
+/**
+ * \brief Initialize and register the NVTX binary payload schema.
+ *
+ * This does essentially the same as `NVTX_DEFINE_STRUCT_WITH_SCHEMA`, but in
+ * addition the schema is registered. The schema ID will be defined as follows:
+ * `const uint64_t struct_id##_schemaId`.
+ *
+ * @param domain The NVTX domain handle.
+ * All other parameters are similar to `NVTX_DEFINE_STRUCT_WITH_SCHEMA`.
+ */
+#define NVTX_DEFINE_STRUCT_WITH_SCHEMA_AND_REGISTER(domain, struct_id, ...) \
+    _NVTX_DEFINE_STRUCT_WITH_SCHEMA(struct_id, __VA_ARGS__) \
+    const uint64_t struct_id##_schemaId = nvtxPayloadSchemaRegister(domain, &struct_id##Attr);
+/**
+ * \brief Define payload schema for an existing `struct` and register the schema.
+ *
+ * This does essentially the same as `NVTX_PAYLOAD_STATIC_SCHEMA_DEFINE`, but in
+ * addition, the schema is registered and `uint64_t struct_id##_schemaId` set.
+ *
+ * @param domain The NVTX domain handle.
+ * All other parameters are similar to `NVTX_PAYLOAD_STATIC_SCHEMA_DEFINE`.
+ */
+#define NVTX_DEFINE_SCHEMA_FOR_STRUCT_AND_REGISTER(domain, struct_id, ...) \
+    _NVTX_DEFINE_SCHEMA_FOR_STRUCT(struct_id, __VA_ARGS__) \
+    const uint64_t struct_id##_schemaId = nvtxPayloadSchemaRegister(domain, &struct_id##Attr);
+/**
+ * \brief Create a type definition for the given struct ID and members.
+ *
+ * This is a convenience macro. A normal `typedef` can be used instead.
+ *
+ * Example usage:
+ *   NVTX_DEFINE_STRUCT(your_struct,
+ *           (double, fp64),
+ *           (uint8_t, u8),
+ *           (float, fp32[3])
+ *   )
+ *
+ * @param struct_id The name of the struct.
+ * @param members The members of the struct.
+ */
+#define NVTX_DEFINE_STRUCT(struct_id, ...) \
+    _NVTX_PAYLOAD_TYPEDEF_STRUCT(struct_id, __VA_ARGS__)
+/**
+ * \brief Register an NVTX binary payload schema.
+ *
+ * This is a convenience macro, which takes the same `struct_id` that has been
+ * used in other helper macros. Instead, `nvtxPayloadSchemaRegister` can also be
+ * used, but `&struct_id##Attr` has to be passed.
+ *
+ * @param domain The NVTX domain handle.
+ * @param struct_id The name of the struct.
+ *
+ * @return NVTX schema ID
+ */
+#define NVTX_PAYLOAD_SCHEMA_REGISTER(domain, struct_id) \
+    nvtxPayloadSchemaRegister(domain, &struct_id##Attr);

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSemanticsCounters.h ADDED Viewed

	@@ -0,0 +1,132 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#include "nvToolsExtPayload.h"
+/** Identifier of the semantic extension for counters. */
+#ifndef NVTX_SEMANTIC_ID_COUNTERS_V1
+#define NVTX_SEMANTIC_ID_COUNTERS_V1 5
+/* Use with the version field of `nvtxSemanticsHeader_t`. */
+#define NVTX_COUNTER_SEMANTIC_VERSION 2
+/***  Flags to augment the counter value. ***/
+#define NVTX_COUNTER_FLAGS_NONE  0
+/**
+ * Convert the fixed point value to a normalized floating point.
+ * Use the sign/unsign from the underlying type this flag is applied to.
+ * Unsigned [0f : 1f] or signed [-1f : 1f]
+ */
+#define NVTX_COUNTER_FLAG_NORMALIZE  (1 << 1)
+/**
+ * Tools should apply scale and limits when graphing, ideally in a "soft" way to
+ * to see when limits are exceeded.
+ */
+#define NVTX_COUNTER_FLAG_LIMIT_MIN  (1 << 2)
+#define NVTX_COUNTER_FLAG_LIMIT_MAX  (1 << 3)
+#define NVTX_COUNTER_FLAG_LIMITS \
+    (NVTX_COUNTER_FLAG_LIMIT_MIN | NVTX_COUNTER_FLAG_LIMIT_MAX)
+/**
+ * Counter value types
+ */
+#define NVTX_COUNTER_FLAG_VALUETYPE_ABSOLUTE          (1 << 4)
+/* Delta to previous sample, tool-defined if no previous sample is available. */
+#define NVTX_COUNTER_FLAG_VALUETYPE_DELTA             (2 << 4)
+#define NVTX_COUNTER_FLAG_VALUETYPE_DELTA_SINCE_START (3 << 4)
+/**
+ * Counter interpolation / effective range of counters.
+ */
+/* No interpolation between samples. */
+#define NVTX_COUNTER_FLAG_INTERPOLATION_POINT         (1 << 8)
+/* Piecewise constant interpolation between the current and the last sample. */
+#define NVTX_COUNTER_FLAG_INTERPOLATION_SINCE_LAST    (2 << 8)
+/* Piecewise constant interpolation between the current and the next sample. */
+#define NVTX_COUNTER_FLAG_INTERPOLATION_UNTIL_NEXT    (3 << 8)
+/* Piecewise linear interpolation between samples. */
+#define NVTX_COUNTER_FLAG_INTERPOLATION_LINEAR        (4 << 8)
+/**
+ * Datatype for limits union (value of `limitType`).
+ */
+#define NVTX_COUNTER_LIMIT_UNDEFINED 0
+#define NVTX_COUNTER_LIMIT_I64       1
+#define NVTX_COUNTER_LIMIT_U64       2
+#define NVTX_COUNTER_LIMIT_F64       3
+/**
+ * Union of datatypes that can be used as counter value limits.
+ */
+typedef union
+{
+    int64_t i64;
+    uint64_t u64;
+    double f64;
+} nvtxCounterLimit_t;
+/**
+ * \brief Specify additional properties of a counter or counter group.
+ */
+typedef struct nvtxSemanticsCounter_v1
+{
+    /** Header of the semantic extension (with identifier, version, etc.). */
+    struct nvtxSemanticsHeader_v1 header;
+    /**
+     * Flag if normalization, scale limits, etc. should be applied to counter
+     * values.
+     */
+    uint64_t flags;
+    /** Unit of the counter value (case insensitive) */
+    const char* unit;
+    /** Should be 1 if not used. */
+    uint64_t unitScaleNumerator;
+    /** Should be 1 if not used. */
+    uint64_t unitScaleDenominator;
+    /**
+     * Specifies the used union member for `min` and `max`.
+     * Use the defines `NVTX_COUNTER_LIMIT_*`.
+     */
+    int64_t limitType;
+    /** Value limits. */
+    nvtxCounterLimit_t min;
+    nvtxCounterLimit_t max;
+} nvtxSemanticsCounter_t;
+#endif /* NVTX_SEMANTIC_ID_COUNTERS_V1 */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSemanticsScope.h ADDED Viewed

	@@ -0,0 +1,50 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#include "nvToolsExtPayload.h"
+#ifndef NVTX_SEMANTIC_ID_SCOPE_V1
+#define NVTX_SEMANTIC_ID_SCOPE_V1 1
+/**
+ * \brief Specify the NVTX scope for a payload entry.
+ *
+ * This allows the scope to be set for a specific value or counter in a payload.
+ * The scope must be known at schema registration time.
+ */
+typedef struct nvtxSemanticsScope_v1
+{
+    struct nvtxSemanticsHeader_v1 header;
+    /** Specifies the scope of a payload entry, e.g. a counter or timestamp. */
+    uint64_t scopeId;
+} nvtxSemanticsScope_t;
+#endif /* NVTX_SEMANTIC_ID_SCOPE_V1 */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSemanticsTime.h ADDED Viewed

	@@ -0,0 +1,49 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#include "nvToolsExtPayload.h"
+/** Identifier of the semantic extension for timestamps. */
+#ifndef NVTX_SEMANTIC_ID_TIME_V1
+#define NVTX_SEMANTIC_ID_TIME_V1 2
+/* Use with the version field of `nvtxSemanticsHeader_t`. */
+#define NVTX_TIME_SEMANTIC_VERSION 1
+/** Semantic extension specifying timestamp properties. */
+typedef struct nvtxSemanticsTime_v1
+{
+    struct nvtxSemanticsHeader_v1 header;
+    /** Time domain ID or predefined `NVTX_TIMESTAMP_TYPE_*`. */
+    uint64_t timeDomainId;
+} nvtxSemanticsTime_t;
+#endif /* NVTX_SEMANTIC_ID_TIME_V1 */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvToolsExtSync.h ADDED Viewed

	@@ -0,0 +1,406 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#include "nvToolsExt.h"
+#ifndef NVTOOLSEXT_SYNC_V3
+#define NVTOOLSEXT_SYNC_V3
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+/* \cond SHOW_HIDDEN
+* \version NVTX_VERSION_2
+*/
+#define NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE (NVTX_STATIC_CAST(uint16_t, sizeof(nvtxSyncUserAttributes_v0)))
+/** \endcond */
+/**
+* \page PAGE_SYNCHRONIZATION Synchronization
+*
+* This section covers a subset of the API that allow users to track additional
+* synchronization details of their application.   Naming OS synchronization primitives
+* may allow users to better understand the data collected by traced synchronization
+* APIs.  Additionally, a user defined synchronization object can allow the users to
+* to tell the tools when the user is building their own synchronization system
+* that do not rely on the OS to provide behaviors and instead use techniques like
+* atomic operations and spinlocks.
+*
+* See module \ref SYNCHRONIZATION for details.
+*
+* \par Example
+* Instrument a mutex class:
+* \code
+* class MyMutex
+* {
+*     volatile long bLocked;
+*     nvtxSyncUser_t hSync;
+* public:
+*     MyMutex(const char* name, nvtxDomainHandle_t d) {
+*          bLocked = 0;
+*
+*          nvtxSyncUserAttributes_t attribs = { 0 };
+*          attribs.version = NVTX_VERSION;
+*          attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
+*          attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
+*          attribs.message.ascii = name;
+*          hSync = nvtxDomainSyncUserCreate(d, &attribs);
+*     }
+*
+*     ~MyMutex() {
+*          nvtxDomainSyncUserDestroy(hSync);
+*     }
+*
+*     bool Lock() {
+*          nvtxDomainSyncUserAcquireStart(hSync);
+*          bool acquired = __sync_bool_compare_and_swap(&bLocked, 0, 1); // atomic compiler intrinsic
+*
+*          if (acquired) {
+*              nvtxDomainSyncUserAcquireSuccess(hSync);
+*          }
+*          else {
+*              nvtxDomainSyncUserAcquireFailed(hSync);
+*          }
+*          return acquired;
+*     }
+*
+*     void Unlock() {
+*          nvtxDomainSyncUserReleasing(hSync);
+*          bLocked = false;
+*     }
+* };
+* \endcode
+*
+* \version NVTX_VERSION_2
+*/
+/*  ------------------------------------------------------------------------- */
+/* \cond SHOW_HIDDEN
+* \brief Used to build a non-colliding value for resource types separated class
+* \version NVTX_VERSION_2
+*/
+#define NVTX_RESOURCE_CLASS_SYNC_OS 2 /**< Synchronization objects that are OS specific. */
+#define NVTX_RESOURCE_CLASS_SYNC_PTHREAD 3 /**< Synchronization objects that are from the POSIX Threads API (pthread)*/
+/** \endcond */
+/*  ------------------------------------------------------------------------- */
+/** \defgroup SYNCHRONIZATION Synchronization
+* See page \ref PAGE_SYNCHRONIZATION.
+* @{
+*/
+/** \brief Resource type values for OSs with POSIX Thread API support
+ */
+typedef enum nvtxResourceSyncPosixThreadType_t
+{
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 1), /* pthread_mutex_t  */
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_CONDITION = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 2), /* pthread_cond_t  */
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_RWLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 3), /* pthread_rwlock_t  */
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_BARRIER = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 4), /* pthread_barrier_t  */
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_SPINLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 5), /* pthread_spinlock_t  */
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_ONCE = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 6) /* pthread_once_t  */
+} nvtxResourceSyncPosixThreadType_t;
+/** \brief Resource type values for Windows OSs
+*/
+typedef enum nvtxResourceSyncWindowsType_t
+{
+    NVTX_RESOURCE_TYPE_SYNC_WINDOWS_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 1),
+    NVTX_RESOURCE_TYPE_SYNC_WINDOWS_SEMAPHORE = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 2),
+    NVTX_RESOURCE_TYPE_SYNC_WINDOWS_EVENT = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 3),
+    NVTX_RESOURCE_TYPE_SYNC_WINDOWS_CRITICAL_SECTION = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 4),
+    NVTX_RESOURCE_TYPE_SYNC_WINDOWS_SRWLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 5)
+} nvtxResourceSyncWindowsType_t;
+/** \brief Resource type values for Linux and Linux derived OSs such as Android
+* \sa
+* ::nvtxResourceSyncPosixThreadType_t
+*/
+typedef enum nvtxResourceSyncLinuxType_t
+{
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 1),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_FUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 2),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_SEMAPHORE = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 3),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_COMPLETION = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 4),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_SPINLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 5),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_SEQLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 6),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_RCU = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 7)
+} nvtxResourceSyncLinuxType_t;
+/** \brief Resource type values for Android come from Linux.
+* \sa
+* ::nvtxResourceSyncLinuxType_t
+* ::nvtxResourceSyncPosixThreadType_t
+*/
+typedef enum nvtxResourceSyncLinuxType_t nvtxResourceSyncAndroidType_t;
+/** \brief User Defined Synchronization Object Handle .
+* \anchor SYNCUSER_HANDLE_STRUCTURE
+*
+* This structure is opaque to the user and is used as a handle to reference
+* a user defined synchronization object.  The tools will return a pointer through the API for the application
+* to hold on its behalf to reference the string in the future.
+*
+*/
+typedef struct nvtxSyncUser* nvtxSyncUser_t;
+/** \brief User Defined Synchronization Object Attributes Structure.
+* \anchor USERDEF_SYNC_ATTRIBUTES_STRUCTURE
+*
+* This structure is used to describe the attributes of a user defined synchronization
+* object.  The layout of the structure is defined by a specific version of the tools
+* extension library and can change between different versions of the Tools Extension
+* library.
+*
+* \par Guidelines
+* The caller should always perform the following three tasks when using
+* attributes:
+* <ul>
+*    <li>Zero the structure
+*    <li>Set the version field
+*    <li>Set the size field
+* </ul>
+*
+* Zeroing the structure sets all the event attributes types and values
+* to the default value.
+*
+* The version and size field are used by the Tools Extension
+* implementation to handle multiple versions of the attributes structure.
+*
+* It is recommended that the caller use one of the following to methods
+* to initialize the event attributes structure:
+*
+* \par Method 1
+* Initializing nvtxEventAttributes for future compatibility:
+* \code
+* nvtxSyncUserAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
+* \endcode
+*
+* \par Method 2
+* Initializing nvtxSyncUserAttributes_t for a specific version:
+* \code
+* nvtxSyncUserAttributes_t attribs = {0};
+* attribs.version = 1;
+* attribs.size = (uint16_t)(sizeof(nvtxSyncUserAttributes_t));
+* \endcode
+*
+* If the caller uses Method 1 it is critical that the entire binary
+* layout of the structure be configured to 0 so that all fields
+* are initialized to the default value.
+*
+* The caller should either use both NVTX_VERSION and
+* NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
+* and a versioned type (Method 2).  Using a mix of the two methods
+* will likely cause either source level incompatibility or binary
+* incompatibility in the future.
+*
+* \par Example
+* Populate a sync attributes structure:
+* \code
+* // Initialize
+* nvtxSyncUserAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
+*
+* // Configure the Attributes
+* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* attribs.message.ascii = "Example";
+* \endcode
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+*/
+typedef struct nvtxSyncUserAttributes_v0
+{
+    /**
+    * \brief Version flag of the structure.
+    *
+    * Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
+    * supported in this header file. This can optionally be overridden to
+    * another version of the tools extension library.
+    */
+    uint16_t version;
+    /**
+    * \brief Size of the structure.
+    *
+    * Needs to be set to the size in bytes of the event attribute
+    * structure used to specify the event.
+    */
+    uint16_t size;
+    /** \brief Message type specified in this attribute structure.
+    *
+    * Defines the message format of the attribute structure's \ref nvtxSyncUserAttributes_v0::message
+    * "message" field.
+    *
+    * Default Value is NVTX_MESSAGE_UNKNOWN
+    */
+    int32_t messageType;            /* nvtxMessageType_t */
+    /** \brief Message assigned to this attribute structure.
+    *
+    * The text message that is attached to an event.
+    */
+    nvtxMessageValue_t message;
+} nvtxSyncUserAttributes_v0;
+typedef struct nvtxSyncUserAttributes_v0 nvtxSyncUserAttributes_t;
+/* ------------------------------------------------------------------------- */
+/** \brief Create a user defined synchronization object
+* This is used to track non-OS synchronization working with spinlocks and atomics
+*
+* \param domain - Domain to own the resource
+* \param attribs - A structure to assign multiple attributes to the object.
+*
+* \return A handle that represents the newly created user defined synchronization object.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version NVTX_VERSION_2
+*/
+NVTX_DECLSPEC nvtxSyncUser_t NVTX_API nvtxDomainSyncUserCreate(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
+/* ------------------------------------------------------------------------- */
+/** \brief Destroy a user defined synchronization object
+* This is used to track non-OS synchronization working with spinlocks and atomics
+*
+* \param handle - A handle to the object to operate on.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version NVTX_VERSION_2
+*/
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserDestroy(nvtxSyncUser_t handle);
+/* ------------------------------------------------------------------------- */
+/** \brief Signal to tools that an attempt to acquire a user defined synchronization object
+*
+* \param handle - A handle to the object to operate on.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version NVTX_VERSION_2
+*/
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireStart(nvtxSyncUser_t handle);
+/* ------------------------------------------------------------------------- */
+/** \brief Signal to tools of failure in acquiring a user defined synchronization object
+* This should be called after \ref nvtxDomainSyncUserAcquireStart
+*
+* \param handle - A handle to the object to operate on.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version NVTX_VERSION_2
+*/
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireFailed(nvtxSyncUser_t handle);
+/* ------------------------------------------------------------------------- */
+/** \brief Signal to tools of success in acquiring a user defined synchronization object
+* This should be called after \ref nvtxDomainSyncUserAcquireStart.
+*
+* \param handle - A handle to the object to operate on.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version NVTX_VERSION_2
+*/
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireSuccess(nvtxSyncUser_t handle);
+/* ------------------------------------------------------------------------- */
+/** \brief Signal to tools of releasing a reservation on user defined synchronization object
+* This should be called after \ref nvtxDomainSyncUserAcquireSuccess.
+*
+* \param handle - A handle to the object to operate on.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version NVTX_VERSION_2
+*/
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserReleasing(nvtxSyncUser_t handle);
+/** @} */ /*END defgroup*/
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#ifndef NVTX_NO_IMPL
+#define NVTX_IMPL_GUARD_SYNC /* Ensure other headers cannot be included directly */
+#include "nvtxDetail/nvtxImplSync_v3.h"
+#undef NVTX_IMPL_GUARD_SYNC
+#endif /*NVTX_NO_IMPL*/
+#endif /* NVTOOLSEXT_SYNC_V3 */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtx3.hpp ADDED Viewed

The diff for this file is too large to render. See raw diff

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtHelperMacros.h ADDED Viewed

	@@ -0,0 +1,64 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#ifndef NVTX_EXT_HELPER_MACROS_H
+#define NVTX_EXT_HELPER_MACROS_H
+#if !defined(NVTX_NULLPTR)
+#if defined(__cplusplus) && __cplusplus >= 201103L
+#define NVTX_NULLPTR nullptr
+#else
+#define NVTX_NULLPTR NULL
+#endif
+#endif
+/* Combine tokens */
+#define _NVTX_EXT_CONCAT(a, b) a##b
+#define NVTX_EXT_CONCAT(a, b) _NVTX_EXT_CONCAT(a, b)
+/* Resolves to the number of arguments passed. */
+#define NVTX_EXT_NUM_ARGS(...) \
+    NVTX_EXT_SELECTA16(__VA_ARGS__, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, throwaway)
+#define NVTX_EXT_SELECTA16(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, ...) a16
+/* Cast argument(s) to void to prevent unused variable warnings. */
+#define _NVTX_EXT_VOIDIFY0()
+#define _NVTX_EXT_VOIDIFY1(a1) (void)a1;
+#define _NVTX_EXT_VOIDIFY2(a1, a2) (void)a1; (void)a2;
+#define _NVTX_EXT_VOIDIFY3(a1, a2, a3) (void)a1; (void)a2; (void)a3;
+#define _NVTX_EXT_VOIDIFY4(a1, a2, a3, a4) (void)a1; (void)a2; (void)a3; (void)a4;
+#define _NVTX_EXT_VOIDIFY5(a1, a2, a3, a4, a5) (void)a1; (void)a2; (void)a3; (void)a4; (void)a5;
+#define _NVTX_EXT_VOIDIFY6(a1, a2, a3, a4, a5, a6) (void)a1; (void)a2; (void)a3; (void)a4; (void)a5; (void)a6;
+/* Mark function arguments as unused. */
+#define NVTX_EXT_HELPER_UNUSED_ARGS(...) \
+    NVTX_EXT_CONCAT(_NVTX_EXT_VOIDIFY, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
+#endif /* NVTX_EXT_HELPER_MACROS_H */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImpl.h ADDED Viewed

	@@ -0,0 +1,123 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_EXT_IMPL_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#ifndef NVTX_EXT_IMPL_H
+#define NVTX_EXT_IMPL_H
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <wchar.h>
+/* ---- Include required platform headers ---- */
+#if defined(_WIN32)
+#include <windows.h>
+#else
+#include <unistd.h>
+#if defined(__ANDROID__)
+#include <android/api-level.h>
+#endif
+#if defined(__linux__) || defined(__CYGWIN__)
+#include <sched.h>
+#endif
+#include <sys/types.h>
+#include <limits.h>
+#include <dlfcn.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <pthread.h>
+#endif
+/* ---- Define macros used in this file ---- */
+#ifdef NVTX_DEBUG_PRINT
+#ifdef __ANDROID__
+#include <android/log.h>
+#define NVTX_ERR(...) __android_log_print(ANDROID_LOG_ERROR, "NVTOOLSEXT", __VA_ARGS__);
+#define NVTX_INFO(...) __android_log_print(ANDROID_LOG_INFO, "NVTOOLSEXT", __VA_ARGS__);
+#else
+#include <stdio.h>
+#define NVTX_ERR(...) fprintf(stderr, "NVTX_ERROR: " __VA_ARGS__)
+#define NVTX_INFO(...) fprintf(stderr, "NVTX_INFO: " __VA_ARGS__)
+#endif
+#else /* !defined(NVTX_DEBUG_PRINT) */
+#define NVTX_ERR(...)
+#define NVTX_INFO(...)
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+#ifdef __GNUC__
+#pragma GCC visibility push(hidden)
+#endif
+#define NVTX_EXTENSION_FRESH 0    /* Uninitialized extension or function slot */
+#define NVTX_EXTENSION_DISABLED 1 /* Disabled extension or function slot */
+#define NVTX_EXTENSION_STARTING 2 /* Extension is being initialized. */
+#define NVTX_EXTENSION_LOADED 3   /* Extension is initialized successfully. */
+#define NVTX_EXTENSION_INIT_FN_FAILED 4 /* Extension init function returned failure. */
+/* Function slots are local to each extension */
+typedef struct nvtxExtGlobals1_t
+{
+    NvtxExtInitializeInjectionFunc_t injectionFnPtr;
+} nvtxExtGlobals1_t;
+NVTX_LINKONCE_DEFINE_GLOBAL nvtxExtGlobals1_t NVTX_VERSIONED_IDENTIFIER(nvtxExtGlobals1) =
+{
+    NVTX_NULLPTR
+};
+#define NVTX_EXT_INIT_GUARD
+#include "nvtxExtInit.h"
+#undef NVTX_EXT_INIT_GUARD
+#ifdef __GNUC__
+#pragma GCC visibility pop
+#endif
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+#endif /* NVTX_EXT_IMPL_H */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplCounters_v1.h ADDED Viewed

	@@ -0,0 +1,166 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_EXT_IMPL_COUNTERS_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExtCounters.h (except when NVTX_NO_IMPL is defined).
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#define NVTX_EXT_IMPL_GUARD
+#include "nvtxExtImpl.h"
+#undef NVTX_EXT_IMPL_GUARD
+#ifndef NVTX_EXT_IMPL_COUNTERS_V1
+#define NVTX_EXT_IMPL_COUNTERS_V1
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+#ifdef NVTX_DISABLE
+#include "nvtxExtHelperMacros.h"
+#define NVTX_EXT_COUNTERS_IMPL_FN_V1(ret_type, fn_name, signature, arg_names) \
+NVTX_DECLSPEC ret_type NVTX_API fn_name signature { \
+    NVTX_SET_NAME_MANGLING_OPTIONS \
+    NVTX_EXT_HELPER_UNUSED_ARGS arg_names \
+    NVTX_EXT_FN_RETURN_INVALID(ret_type) \
+}
+#else /* NVTX_DISABLE */
+/*
+ * Function slots for the counters extension. First entry is the module state,
+ * initialized to `0` (`NVTX_EXTENSION_FRESH`).
+ */
+#define NVTX_EXT_COUNTERS_SLOT_COUNT 63
+NVTX_LINKONCE_DEFINE_GLOBAL intptr_t
+NVTX_EXT_COUNTERS_VERSIONED_ID(nvtxExtCountersSlots)[NVTX_EXT_COUNTERS_SLOT_COUNT + 1]
+    = {0};
+/* Avoid warnings about missing prototype. */
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_EXT_COUNTERS_VERSIONED_ID(nvtxExtCountersInitOnce)(void);
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_EXT_COUNTERS_VERSIONED_ID(nvtxExtCountersInitOnce)(void)
+{
+    intptr_t* fnSlots = NVTX_EXT_COUNTERS_VERSIONED_ID(nvtxExtCountersSlots) + 1;
+    nvtxExtModuleSegment_t segment = {
+        0, /* unused (only one segment) */
+        NVTX_EXT_COUNTERS_SLOT_COUNT,
+        NVTX_NULLPTR /* function slots */
+    };
+    nvtxExtModuleInfo_t module_info = {
+        NVTX_VERSION, sizeof(nvtxExtModuleInfo_t),
+        NVTX_EXT_COUNTERS_MODULEID, NVTX_EXT_COUNTERS_COMPATID,
+        1, NVTX_NULLPTR, /* number of segments, segments */
+        NVTX_NULLPTR, /* no export function needed */
+        NVTX_NULLPTR /* no extension private info */
+    };
+    segment.functionSlots = fnSlots;
+    module_info.segments = &segment;
+    NVTX_INFO( "%s\n", __FUNCTION__  );
+    NVTX_VERSIONED_IDENTIFIER(nvtxExtInitOnce)(&module_info,
+        NVTX_EXT_COUNTERS_VERSIONED_ID(nvtxExtCountersSlots));
+}
+#define NVTX_EXT_COUNTERS_IMPL_FN_V1(ret_type, fn_name, signature, arg_names) \
+typedef ret_type (*fn_name##_impl_fntype)signature; \
+NVTX_DECLSPEC ret_type NVTX_API fn_name signature { \
+    NVTX_SET_NAME_MANGLING_OPTIONS \
+    intptr_t* pSlot = &NVTX_EXT_COUNTERS_VERSIONED_ID(nvtxExtCountersSlots)[NVTX3EXT_CBID_##fn_name + 1]; \
+    intptr_t slot = *pSlot; \
+    if (slot != NVTX_EXTENSION_DISABLED) { \
+        if (slot != NVTX_EXTENSION_FRESH) { \
+            NVTX_EXT_FN_RETURN (*NVTX_REINTERPRET_CAST(fn_name##_impl_fntype, slot)) arg_names; \
+        } else { \
+            NVTX_EXT_COUNTERS_VERSIONED_ID(nvtxExtCountersInitOnce)(); \
+            /* Re-read function slot after extension initialization. */ \
+            slot = *pSlot; \
+            if (slot != NVTX_EXTENSION_DISABLED && slot != NVTX_EXTENSION_FRESH) { \
+                NVTX_EXT_FN_RETURN (*NVTX_REINTERPRET_CAST(fn_name##_impl_fntype, slot)) arg_names; \
+            } \
+        } \
+    } \
+    NVTX_EXT_FN_RETURN_INVALID(ret_type) /* No tool attached. */ \
+}
+#endif /* NVTX_DISABLE */
+/* Non-void functions. */
+#define NVTX_EXT_FN_RETURN return
+#define NVTX_EXT_FN_RETURN_INVALID(rtype) return NVTX_STATIC_CAST(rtype, 0);
+NVTX_EXT_COUNTERS_IMPL_FN_V1(uint64_t, nvtxCounterRegister,
+    (nvtxDomainHandle_t domain, const nvtxCounterAttr_t* attr),
+    (domain, attr))
+#undef NVTX_EXT_FN_RETURN
+#undef NVTX_EXT_FN_RETURN_INVALID
+/* END: Non-void functions. */
+/* void functions. */
+#define NVTX_EXT_FN_RETURN
+#define NVTX_EXT_FN_RETURN_INVALID(rtype)
+NVTX_EXT_COUNTERS_IMPL_FN_V1(void, nvtxCounterSampleInt64,
+    (nvtxDomainHandle_t domain, uint64_t counterId, int64_t value),
+    (domain, counterId, value))
+NVTX_EXT_COUNTERS_IMPL_FN_V1(void, nvtxCounterSampleFloat64,
+    (nvtxDomainHandle_t domain, uint64_t counterId, double value),
+    (domain, counterId, value))
+NVTX_EXT_COUNTERS_IMPL_FN_V1(void, nvtxCounterSample,
+    (nvtxDomainHandle_t domain, uint64_t counterId, const void* values, size_t size),
+    (domain, counterId, values, size))
+NVTX_EXT_COUNTERS_IMPL_FN_V1(void, nvtxCounterSampleNoValue,
+    (nvtxDomainHandle_t domain, uint64_t counterId, uint8_t reason),
+    (domain, counterId, reason))
+NVTX_EXT_COUNTERS_IMPL_FN_V1(void, nvtxCounterBatchSubmit,
+    (nvtxDomainHandle_t domain, const nvtxCounterBatch_t* counterData),
+    (domain, counterData))
+#undef NVTX_EXT_FN_RETURN
+#undef NVTX_EXT_FN_RETURN_INVALID
+/* END: void functions. */
+/* Keep NVTX_EXT_COUNTERS_IMPL_FN_V1 defined for a future version of this extension. */
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+#endif /* NVTX_EXT_IMPL_COUNTERS_V1 */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplMemCudaRt_v1.h ADDED Viewed

	@@ -0,0 +1,72 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_EXT_IMPL_MEM_CUDART_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExtMemCudaRt.h (except when NVTX_NO_IMPL is defined).
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#ifndef NVTX_EXT_IMPL_MEM_CUDART_V1
+#define NVTX_EXT_IMPL_MEM_CUDART_V1
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+/* NVTX_EXT_MEM_IMPL_FN_V1 defined in nvtxExtImplMem_v1.h */
+/* Non-void functions. */
+#define NVTX_EXT_FN_RETURN return
+#define NVTX_EXT_FN_RETURN_INVALID(rtype) return NVTX_NULLPTR;
+NVTX_EXT_MEM_IMPL_FN_V1(nvtxMemPermissionsHandle_t, nvtxMemCudaGetProcessWidePermissions, (nvtxDomainHandle_t domain), (domain))
+NVTX_EXT_MEM_IMPL_FN_V1(nvtxMemPermissionsHandle_t, nvtxMemCudaGetDeviceWidePermissions, (nvtxDomainHandle_t domain, int device), (domain, device))
+#undef NVTX_EXT_FN_RETURN
+#undef NVTX_EXT_FN_RETURN_INVALID
+/* END: Non-void functions. */
+/* void functions. */
+#define NVTX_EXT_FN_RETURN
+#define NVTX_EXT_FN_RETURN_INVALID(rtype)
+NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemCudaSetPeerAccess, (nvtxDomainHandle_t domain, nvtxMemPermissionsHandle_t permissions, int devicePeer, uint32_t flags), (domain, permissions, devicePeer, flags))
+NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemCudaMarkInitialized, (nvtxDomainHandle_t domain, cudaStream_t stream, uint8_t isPerThreadStream, nvtxMemMarkInitializedBatch_t const* desc), (domain, stream, isPerThreadStream, desc))
+#undef NVTX_EXT_FN_RETURN
+#undef NVTX_EXT_FN_RETURN_INVALID
+/* END: void functions. */
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+#endif /* NVTX_EXT_IMPL_MEM_CUDART_V1 */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplMem_v1.h ADDED Viewed

	@@ -0,0 +1,168 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_EXT_IMPL_MEM_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExtMem.h (except when NVTX_NO_IMPL is defined).
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#define NVTX_EXT_IMPL_GUARD
+#include "nvtxExtImpl.h"
+#undef NVTX_EXT_IMPL_GUARD
+#ifndef NVTX_EXT_IMPL_MEM_V1
+#define NVTX_EXT_IMPL_MEM_V1
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+#ifdef NVTX_DISABLE
+#include "nvtxExtHelperMacros.h"
+#define NVTX_EXT_MEM_IMPL_FN_V1(ret_type, fn_name, signature, arg_names) \
+NVTX_DECLSPEC ret_type NVTX_API fn_name signature { \
+    NVTX_SET_NAME_MANGLING_OPTIONS \
+    NVTX_EXT_HELPER_UNUSED_ARGS arg_names \
+    NVTX_EXT_FN_RETURN_INVALID(ret_type) \
+}
+#else /* NVTX_DISABLE */
+/*
+ * Function slots for the memory extension. First entry is the module
+ * state, initialized to `0` (`NVTX_EXTENSION_FRESH`).
+ */
+#define NVTX_EXT_MEM_SLOT_COUNT 63
+NVTX_LINKONCE_DEFINE_GLOBAL intptr_t
+NVTX_EXT_MEM_VERSIONED_ID(nvtxExtMemSlots)[NVTX_EXT_MEM_SLOT_COUNT + 1]
+    = {0};
+/* Avoid warnings about missing prototype. */
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_EXT_MEM_VERSIONED_ID(nvtxExtMemInitOnce)(void);
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_EXT_MEM_VERSIONED_ID(nvtxExtMemInitOnce)(void)
+{
+    intptr_t* fnSlots = NVTX_EXT_MEM_VERSIONED_ID(nvtxExtMemSlots) + 1;
+    nvtxExtModuleSegment_t segment = {
+        1, /* only one segment, hard-code ID */
+        NVTX_EXT_MEM_SLOT_COUNT,
+        NVTX_NULLPTR /* function slots */
+    };
+    nvtxExtModuleInfo_t module_info = {
+        NVTX_VERSION, sizeof(nvtxExtModuleInfo_t),
+        NVTX_EXT_MODULEID_MEM, NVTX_EXT_COMPATID_MEM,
+        1, NVTX_NULLPTR, /* number of segments, segments */
+        NVTX_NULLPTR, /* no export function needed */
+        NVTX_NULLPTR /* no extension private info */
+    };
+    segment.functionSlots = fnSlots;
+    module_info.segments = &segment;
+    NVTX_INFO( "%s\n", __FUNCTION__  );
+    NVTX_VERSIONED_IDENTIFIER(nvtxExtInitOnce)(&module_info,
+        NVTX_EXT_MEM_VERSIONED_ID(nvtxExtMemSlots));
+}
+#define NVTX_EXT_MEM_IMPL_FN_V1(ret_type, fn_name, signature, arg_names) \
+typedef ret_type (*fn_name##_impl_fntype)signature; \
+NVTX_DECLSPEC ret_type NVTX_API fn_name signature { \
+    NVTX_SET_NAME_MANGLING_OPTIONS \
+    intptr_t* pSlot = &NVTX_EXT_MEM_VERSIONED_ID(nvtxExtMemSlots)[NVTX3EXT_CBID_##fn_name + 1]; \
+    intptr_t slot = *pSlot; \
+    if (slot != NVTX_EXTENSION_DISABLED) { \
+        if (slot != NVTX_EXTENSION_FRESH) { \
+            NVTX_EXT_FN_RETURN (*NVTX_REINTERPRET_CAST(fn_name##_impl_fntype, slot)) arg_names; \
+        } else { \
+            NVTX_EXT_MEM_VERSIONED_ID(nvtxExtMemInitOnce)(); \
+            /* Re-read function slot after extension initialization. */ \
+            slot = *pSlot; \
+            if (slot != NVTX_EXTENSION_DISABLED && slot != NVTX_EXTENSION_FRESH) { \
+                NVTX_EXT_FN_RETURN (*NVTX_REINTERPRET_CAST(fn_name##_impl_fntype, slot)) arg_names; \
+            } \
+        } \
+    } \
+    NVTX_EXT_FN_RETURN_INVALID(ret_type) /* No tool attached. */ \
+}
+#endif /* NVTX_DISABLE */
+/* Non-void functions. */
+#define NVTX_EXT_FN_RETURN return
+#define NVTX_EXT_FN_RETURN_INVALID(rtype) return NVTX_NULLPTR;
+NVTX_EXT_MEM_IMPL_FN_V1(nvtxMemHeapHandle_t, nvtxMemHeapRegister, (nvtxDomainHandle_t domain, nvtxMemHeapDesc_t const* desc), (domain, desc))
+NVTX_EXT_MEM_IMPL_FN_V1(nvtxMemPermissionsHandle_t, nvtxMemPermissionsCreate, (nvtxDomainHandle_t domain, int32_t creationflags), (domain, creationflags))
+#undef NVTX_EXT_FN_RETURN
+#undef NVTX_EXT_FN_RETURN_INVALID
+/* END: Non-void functions. */
+/* void functions. */
+#define NVTX_EXT_FN_RETURN
+#define NVTX_EXT_FN_RETURN_INVALID(rtype)
+NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemHeapUnregister, (nvtxDomainHandle_t domain, nvtxMemHeapHandle_t heap), (domain, heap))
+NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemHeapReset, (nvtxDomainHandle_t domain, nvtxMemHeapHandle_t heap), (domain, heap))
+NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemRegionsRegister, (nvtxDomainHandle_t domain, nvtxMemRegionsRegisterBatch_t const* desc), (domain, desc))
+NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemRegionsResize, (nvtxDomainHandle_t domain, nvtxMemRegionsResizeBatch_t const* desc), (domain, desc))
+NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemRegionsUnregister, (nvtxDomainHandle_t domain, nvtxMemRegionsUnregisterBatch_t const* desc), (domain, desc))
+NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemRegionsName, (nvtxDomainHandle_t domain, nvtxMemRegionsNameBatch_t const* desc), (domain, desc))
+NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemPermissionsAssign, (nvtxDomainHandle_t domain, nvtxMemPermissionsAssignBatch_t const* desc), (domain, desc))
+NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemPermissionsDestroy, (nvtxDomainHandle_t domain, nvtxMemPermissionsHandle_t permissions), (domain, permissions))
+NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemPermissionsReset, (nvtxDomainHandle_t domain, nvtxMemPermissionsHandle_t permissions), (domain, permissions))
+NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemPermissionsBind, (nvtxDomainHandle_t domain, nvtxMemPermissionsHandle_t permissions, uint32_t bindScope, uint32_t bindFlags), (domain, permissions, bindScope, bindFlags))
+NVTX_EXT_MEM_IMPL_FN_V1(void, nvtxMemPermissionsUnbind, (nvtxDomainHandle_t domain, uint32_t bindScope), (domain, bindScope))
+#undef NVTX_EXT_FN_RETURN
+#undef NVTX_EXT_FN_RETURN_INVALID
+/* END: void functions. */
+/* Keep NVTX_EXT_MEM_IMPL_FN_V1 defined for a future version of this extension. */
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+#endif /* NVTX_EXT_IMPL_MEM_V1 */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtImplPayload_v1.h ADDED Viewed

	@@ -0,0 +1,265 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_EXT_IMPL_PAYLOAD_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExtPayload.h (except when NVTX_NO_IMPL is defined).
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#define NVTX_EXT_IMPL_GUARD
+#include "nvtxExtImpl.h"
+#undef NVTX_EXT_IMPL_GUARD
+#ifndef NVTX_EXT_IMPL_PAYLOAD_V1
+#define NVTX_EXT_IMPL_PAYLOAD_V1
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+#ifdef NVTX_DISABLE
+#include "nvtxExtHelperMacros.h"
+#define NVTX_EXT_PAYLOAD_IMPL_FN_V1(ret_type, fn_name, signature, arg_names) \
+NVTX_DECLSPEC ret_type NVTX_API fn_name signature { \
+    NVTX_SET_NAME_MANGLING_OPTIONS \
+    NVTX_EXT_HELPER_UNUSED_ARGS arg_names \
+    NVTX_EXT_FN_RETURN_INVALID(ret_type) \
+}
+#define NVTX_EXT_PAYLOAD_IMPL_FN_NOARGS_V1(ret_type, fn_name) \
+NVTX_DECLSPEC ret_type NVTX_API fn_name (void) { \
+    NVTX_SET_NAME_MANGLING_OPTIONS \
+    NVTX_EXT_FN_RETURN_INVALID(ret_type) \
+}
+#else /* NVTX_DISABLE */
+#include "nvtxExtPayloadTypeInfo.h"
+/*
+ * Function slots for the payload extension. First entry is the module state,
+ * initialized to `0` (`NVTX_EXTENSION_FRESH`).
+ */
+#define NVTX_EXT_PAYLOAD_SLOT_COUNT 63
+NVTX_LINKONCE_DEFINE_GLOBAL intptr_t
+NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots)[NVTX_EXT_PAYLOAD_SLOT_COUNT + 1]
+    = {0};
+/* `NVTX_LINKONCE_FWDDECL_FUNCTION` is used to avoid warnings about missing prototypes. */
+/* This helper returns always `1` as `uint8_t`. */
+NVTX_LINKONCE_FWDDECL_FUNCTION uint8_t NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxReturnOne)(void);
+NVTX_LINKONCE_DEFINE_FUNCTION uint8_t NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxReturnOne)(void)
+{
+    return NVTX_STATIC_CAST(uint8_t, 1);
+}
+/*
+ * If a tool is attached, but does not handle `nvtxDomainIsEnabled`, the latter
+ * will always return `1` (enabled).
+ */
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxInitIsDomainEnabledFn)(void);
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxInitIsDomainEnabledFn)(void)
+{
+    intptr_t* pSlot = &NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots)[NVTX3EXT_CBID_nvtxDomainIsEnabled + 1];
+    /* The initialization disables all slots that have not been set by the tool. */
+    if (*pSlot == NVTX_EXTENSION_DISABLED)
+    {
+        intptr_t* moduleState = NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots);
+        int isInitFnSet =
+            NVTX_VERSIONED_IDENTIFIER(nvtxExtGlobals1).injectionFnPtr != NVTX_NULLPTR;
+        /* Make `nvtxDomainIsEnabled` return `1`, if the tool does not provide an extension
+           initialization function or if the tool does not handle `nvtxDomainIsEnabled`. */
+        if (*moduleState == NVTX_EXTENSION_DISABLED ||
+            (isInitFnSet && *moduleState != NVTX_EXTENSION_INIT_FN_FAILED))
+        {
+            *pSlot = NVTX_REINTERPRET_CAST(intptr_t, NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxReturnOne));
+        }
+    }
+}
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadInitOnce)(void);
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadInitOnce)(void)
+{
+    intptr_t* fnSlots = NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots) + 1;
+    nvtxExtModuleSegment_t segment = {
+        0, /* unused (only one segment) */
+        NVTX_EXT_PAYLOAD_SLOT_COUNT,
+        NVTX_NULLPTR /* function slots */
+    };
+    nvtxExtModuleInfo_t module_info = {
+        NVTX_VERSION, sizeof(nvtxExtModuleInfo_t),
+        NVTX_EXT_PAYLOAD_MODULEID, NVTX_EXT_PAYLOAD_COMPATID,
+        1, NVTX_NULLPTR, /* number of segments, segments */
+        NVTX_NULLPTR, /* no export function needed */
+        /* bake type sizes and alignment information into program binary */
+        &(NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadTypeInfo))
+    };
+    segment.functionSlots = fnSlots;
+    module_info.segments = &segment;
+    NVTX_INFO( "%s\n", __FUNCTION__  );
+    NVTX_VERSIONED_IDENTIFIER(nvtxExtInitOnce)(&module_info,
+        NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots));
+    NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxInitIsDomainEnabledFn)();
+}
+#define NVTX_EXT_PAYLOAD_IMPL_FN_V1(ret_type, fn_name, signature, arg_names) \
+typedef ret_type (*fn_name##_impl_fntype)signature; \
+NVTX_DECLSPEC ret_type NVTX_API fn_name signature { \
+    NVTX_SET_NAME_MANGLING_OPTIONS \
+    intptr_t* pSlot = &NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots)[NVTX3EXT_CBID_##fn_name + 1]; \
+    intptr_t slot = *pSlot; \
+    if (slot != NVTX_EXTENSION_DISABLED) { \
+        if (slot != NVTX_EXTENSION_FRESH) { \
+            NVTX_EXT_FN_RETURN (*NVTX_REINTERPRET_CAST(fn_name##_impl_fntype, slot)) arg_names; \
+        } else { \
+            NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadInitOnce)(); \
+            /* Re-read function slot after extension initialization. */ \
+            slot = *pSlot; \
+            if (slot != NVTX_EXTENSION_DISABLED && slot != NVTX_EXTENSION_FRESH) { \
+                NVTX_EXT_FN_RETURN (*NVTX_REINTERPRET_CAST(fn_name##_impl_fntype, slot)) arg_names; \
+            } \
+        } \
+    } \
+    NVTX_EXT_FN_RETURN_INVALID(ret_type) /* No tool attached. */ \
+}
+#define NVTX_EXT_PAYLOAD_IMPL_FN_NOARGS_V1(ret_type, fn_name) \
+    NVTX_EXT_PAYLOAD_IMPL_FN_V1(ret_type, fn_name, (void), ())
+#endif /* NVTX_DISABLE */
+/* Push/pop functions return `NVTX_NO_PUSH_POP_TRACKING` if no tool is attached. */
+#define NVTX_EXT_FN_RETURN return
+#define NVTX_EXT_FN_RETURN_INVALID(rtype) return NVTX_NO_PUSH_POP_TRACKING;
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(int, nvtxRangePushPayload,
+    (nvtxDomainHandle_t domain, const nvtxPayloadData_t* payloadData, size_t count),
+    (domain, payloadData, count))
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(int, nvtxRangePopPayload,
+    (nvtxDomainHandle_t domain, const nvtxPayloadData_t* payloadData, size_t count),
+    (domain, payloadData, count))
+#undef NVTX_EXT_FN_RETURN
+#undef NVTX_EXT_FN_RETURN_INVALID
+/* Non-void functions. */
+#define NVTX_EXT_FN_RETURN return
+#define NVTX_EXT_FN_RETURN_INVALID(rtype) return NVTX_STATIC_CAST(rtype, 0);
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(uint64_t, nvtxPayloadSchemaRegister,
+    (nvtxDomainHandle_t domain, const nvtxPayloadSchemaAttr_t* attr),
+    (domain, attr))
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(uint64_t, nvtxPayloadEnumRegister,
+    (nvtxDomainHandle_t domain, const nvtxPayloadEnumAttr_t* attr),
+    (domain, attr))
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(nvtxRangeId_t, nvtxRangeStartPayload,
+    (nvtxDomainHandle_t domain, const nvtxPayloadData_t* payloadData, size_t count),
+    (domain, payloadData, count))
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(uint8_t, nvtxDomainIsEnabled, (nvtxDomainHandle_t domain), (domain))
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(uint64_t, nvtxScopeRegister, (nvtxDomainHandle_t domain,
+    const nvtxScopeAttr_t* attr), (domain, attr))
+NVTX_EXT_PAYLOAD_IMPL_FN_NOARGS_V1(int64_t, nvtxTimestampGet)
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(uint64_t, nvtxTimeDomainRegister,
+    (nvtxDomainHandle_t domain, const nvtxTimeDomainAttr_t* attr),
+    (domain, attr))
+#undef NVTX_EXT_FN_RETURN
+#undef NVTX_EXT_FN_RETURN_INVALID
+/* END: Non-void functions. */
+/* void functions. */
+#define NVTX_EXT_FN_RETURN
+#define NVTX_EXT_FN_RETURN_INVALID(rtype)
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxMarkPayload, (nvtxDomainHandle_t domain,
+    const nvtxPayloadData_t* payloadData, size_t count), (domain, payloadData, count))
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxRangeEndPayload, (nvtxDomainHandle_t domain,
+    nvtxRangeId_t id, const nvtxPayloadData_t* payloadData, size_t count),
+    (domain, id, payloadData, count))
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxTimerSource,
+    (nvtxDomainHandle_t domain, uint64_t timeDomainId, uint64_t flags, int64_t (*timestampProviderFn)(void)),
+    (domain, timeDomainId, flags, timestampProviderFn))
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxTimerSourceWithData,
+    (nvtxDomainHandle_t domain, uint64_t timeDomainId, uint64_t flags, int64_t (*timestampProviderFn)(void* data), void* data),
+    (domain, timeDomainId, flags, timestampProviderFn, data))
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxTimeSyncPoint,
+    (nvtxDomainHandle_t domain, uint64_t timeDomainId1, uint64_t timeDomainId2,
+        int64_t timestamp1, int64_t timestamp2),
+    (domain, timeDomainId1, timeDomainId2, timestamp1, timestamp2))
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxTimeSyncPointTable,
+    (nvtxDomainHandle_t domain, uint64_t timeDomainIdSrc, uint64_t timeDomainIdDst,
+    const nvtxSyncPoint_t* syncPoints, size_t count),
+    (domain, timeDomainIdSrc, timeDomainIdDst, syncPoints, count))
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxTimestampConversionFactor,
+    (nvtxDomainHandle_t domain, uint64_t timeDomainIdSrc, uint64_t timeDomainIdDst,
+        double slope, int64_t timestampSrc, int64_t timestampDst),
+    (domain, timeDomainIdSrc, timeDomainIdDst, slope, timestampSrc, timestampDst))
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxEventSubmit,
+    (nvtxDomainHandle_t domain, const nvtxPayloadData_t* payloadData, size_t numPayloads),
+    (domain, payloadData, numPayloads))
+NVTX_EXT_PAYLOAD_IMPL_FN_V1(void, nvtxEventBatchSubmit, (nvtxDomainHandle_t domain,
+    const nvtxEventBatch_t* eventBatch), (domain, eventBatch))
+#undef NVTX_EXT_FN_RETURN
+#undef NVTX_EXT_FN_RETURN_INVALID
+/* END: void functions. */
+/* Keep NVTX_EXT_PAYLOAD_IMPL_FN_V1 defined for a future version of this extension. */
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+#endif /* NVTX_EXT_IMPL_PAYLOAD_V1 */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtInit.h ADDED Viewed

	@@ -0,0 +1,437 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_EXT_INIT_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+/* ---- Platform-independent helper definitions and functions ---- */
+/* Prefer macros over inline functions to reduce symbol resolution at link time */
+#if defined(_WIN32)
+#define NVTX_ATOMIC_WRITE_PTR(address, value) \
+    InterlockedExchangePointer(NVTX_REINTERPRET_CAST(volatile PVOID*, (address)), \
+        NVTX_REINTERPRET_CAST(PVOID, (value)))
+#define NVTX_ATOMIC_CAS_PTR(old, address, exchange, comparand) \
+    (old) = NVTX_REINTERPRET_CAST(intptr_t, InterlockedCompareExchangePointer( \
+        NVTX_REINTERPRET_CAST(volatile PVOID*, (address)), \
+        NVTX_REINTERPRET_CAST(PVOID, (exchange)), \
+        NVTX_REINTERPRET_CAST(PVOID, (comparand))))
+#elif defined(__GNUC__)
+/* Ensure full memory barrier for atomics, to match Windows functions */
+#define NVTX_ATOMIC_WRITE_PTR(address, value) \
+    __sync_synchronize(); *address = value; __sync_synchronize()
+#define NVTX_ATOMIC_CAS_PTR(old, address, exchange, comparand) \
+    old = __sync_val_compare_and_swap(address, comparand, exchange)
+#else
+#error The library does not support your configuration!
+#endif
+#ifndef NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
+/* Define this to 1 for platforms that where pre-injected libraries can be discovered. */
+#if defined(_WIN32)
+/* Windows has no process-wide table of dynamic library symbols, so this can't be supported. */
+#define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0
+#else
+/* POSIX platforms allow calling dlsym on a null module to use the process-wide table.
+ * Note: Still disabled in load sequence version 2.  Needs to support following the
+ * RTLD_NEXT chain, and needs more testing before support can be enabled by default. */
+#define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0
+#endif
+#endif
+#ifndef NVTX_SUPPORT_ENV_VARS
+/* Define this to 1 for platforms that support environment variables. */
+/* TODO: Detect UWP, a.k.a. Windows Store app, and set this to 0. */
+/* Try:  #if defined(WINAPI_FAMILY_PARTITION) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) */
+#define NVTX_SUPPORT_ENV_VARS 1
+#endif
+#ifndef NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY
+/* Define this to 1 for platforms that support dynamic/shared libraries */
+#define NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY 1
+#endif
+#ifndef NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE
+#if defined(__ANDROID__)
+#define NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE 1
+#else
+#define NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE 0
+#endif
+#endif
+#ifndef NVTX_SUPPORT_STATIC_INJECTION_LIBRARY
+/* On platforms that support weak symbols (i.e. non-Windows), injection libraries may
+*  be statically linked into an application.  This is useful for platforms where dynamic
+*  injection is not available.  Weak symbols not marked extern are definitions, not just
+*  declarations.  They are guaranteed to be initialized to zero if no normal definitions
+*  are found by the linker to override them.  This means the NVTX load sequence can safely
+*  detect the presence of a static injection -- if InitializeInjectionNvtxExtension_fnptr is zero,
+*  there is no static injection. */
+#if defined(__GNUC__) && !defined(_WIN32) && !defined(__CYGWIN__)
+#define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 1
+#else
+#define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 0
+#endif
+#endif
+#if NVTX_SUPPORT_STATIC_INJECTION_LIBRARY && !defined(NVTX_STATIC_INJECTION_IMPL)
+/* To make an NVTX injection library support static injection, it must do these things:
+*  - Define InitializeInjectionNvtxExtension_fnptr as a normal symbol (not weak), pointing to
+*    the implementation of InitializeInjectionNvtxExtension (which does not need to be a
+*    dynamic export if only supporting static injection).
+*  - Define NVTX_STATIC_INJECTION_IMPL so the weak definition below is skipped.
+*  - Compile the static injection files with -fPIC if they are to be linked with other
+*    files compiled this way.  If you forget this, GCC will simply tell you to add it.
+*  When building the application, there a few ways to link in a static injection:
+*  - Compile the injection's source files normally, and include the .o files as inputs
+*    to the linker.
+*  - If the injection is provided as an archive (.a file), it will not resolve any
+*    unresolved symbols, so the linker will skip it by default.  This can be fixed
+*    by wrapping the static injection's name on the linker command line with options
+*    to treat it differently.  For example:
+*      gcc example.o libfoo.a -Wl,--whole-archive libinj-static.a -Wl,--no-whole-archive libbar.a
+*    Note that libinj-static.a is bracketed by options to turn on "whole archive" and
+*    then back off again afterwards, so libfoo.a and libbar.a are linked normally.
+*  - In CMake, a static injection can be added with options like this:
+*      target_link_libraries(app PRIVATE -Wl,--whole-archive inj-static -Wl,--no-whole-archive)
+*/
+__attribute__((weak)) NvtxExtInitializeInjectionFunc_t InitializeInjectionNvtxExtension_fnptr;
+#endif
+/* This function tries to find or load an NVTX injection library and get the
+*  address of its InitializeInjectionExtension function.  If such a function pointer
+*  is found, it is called, and passed the address of this NVTX instance's
+*  nvtxGetExportTable function, so the injection can attach to this instance.
+*  If the initialization fails for any reason, any dynamic library loaded will
+*  be freed, and all NVTX implementation functions will be set to no-ops.  If
+*  initialization succeeds, NVTX functions not attached to the tool will be set
+*  to no-ops.  This is implemented as one function instead of several small
+*  functions to minimize the number of weak symbols the linker must resolve.
+*  Order of search is:
+*  - Pre-injected library exporting InitializeInjectionNvtxExtension
+*  - Loadable library exporting InitializeInjectionNvtxExtension
+*      - Path specified by env var NVTX_INJECTION??_PATH (?? is 32 or 64)
+*      - On Android, libNvtxInjection??.so within the package (?? is 32 or 64)
+*  - Statically-linked injection library defining InitializeInjectionNvtxExtension_fnptr
+*/
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxExtLoadInjectionLibrary)(
+    NvtxExtInitializeInjectionFunc_t* out_init_fnptr);
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxExtLoadInjectionLibrary)(
+    NvtxExtInitializeInjectionFunc_t* out_init_fnptr)
+{
+    static const char initFuncName[] = "InitializeInjectionNvtxExtension";
+#if NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
+    static const char initFuncPreinjectName[] = "InitializeInjectionNvtxExtensionPreinject";
+#endif
+    NvtxExtInitializeInjectionFunc_t init_fnptr = NVTX_NULLPTR;
+    NVTX_DLLHANDLE injectionLibraryHandle = NVTX_DLLDEFAULT;
+    if (out_init_fnptr)
+    {
+        *out_init_fnptr = NVTX_NULLPTR;
+    }
+#if NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY
+    /* Try discovering dynamic injection library to load */
+    {
+#if NVTX_SUPPORT_ENV_VARS
+        /* If env var NVTX_INJECTION64_PATH is set, it should contain the path
+           to a 64-bit dynamic NVTX injection library (and similar for 32-bit). */
+        const NVTX_PATHCHAR* const nvtxEnvVarName = (sizeof(void*) == 4)
+            ? NVTX_STR("NVTX_INJECTION32_PATH")
+            : NVTX_STR("NVTX_INJECTION64_PATH");
+#endif /* NVTX_SUPPORT_ENV_VARS */
+        NVTX_PATHCHAR injectionLibraryPathBuf[NVTX_BUFSIZE];
+        const NVTX_PATHCHAR* injectionLibraryPath = NVTX_NULLPTR;
+        /* Refer to this variable explicitly in case all references to it are #if'ed out. */
+        (void)injectionLibraryPathBuf;
+#if NVTX_SUPPORT_ENV_VARS
+        /* Disable the warning for getenv & _wgetenv -- this usage is safe because
+           these functions are not called again before using the returned value. */
+#if defined(_MSC_VER)
+#pragma warning( push )
+#pragma warning( disable : 4996 )
+#endif
+        injectionLibraryPath = NVTX_GETENV(nvtxEnvVarName);
+#if defined(_MSC_VER)
+#pragma warning( pop )
+#endif
+#endif
+#if NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE
+        if (!injectionLibraryPath)
+        {
+            const char *bits = (sizeof(void*) == 4) ? "32" : "64";
+            char cmdlineBuf[32];
+            char pkgName[PATH_MAX];
+            int count;
+            int pid;
+            FILE *fp;
+            size_t bytesRead;
+            size_t pos;
+            pid = NVTX_STATIC_CAST(int, getpid());
+            count = snprintf(cmdlineBuf, sizeof(cmdlineBuf), "/proc/%d/cmdline", pid);
+            if (count <= 0 || count >= NVTX_STATIC_CAST(int, sizeof(cmdlineBuf)))
+            {
+                NVTX_ERR("Path buffer too small for: /proc/%d/cmdline\n", pid);
+                return NVTX_ERR_INIT_ACCESS_LIBRARY;
+            }
+            fp = fopen(cmdlineBuf, "r");
+            if (!fp)
+            {
+                NVTX_ERR("File couldn't be opened: %s\n", cmdlineBuf);
+                return NVTX_ERR_INIT_ACCESS_LIBRARY;
+            }
+            bytesRead = fread(pkgName, 1, sizeof(pkgName) - 1, fp);
+            fclose(fp);
+            if (bytesRead == 0)
+            {
+                NVTX_ERR("Package name couldn't be read from file: %s\n", cmdlineBuf);
+                return NVTX_ERR_INIT_ACCESS_LIBRARY;
+            }
+            pkgName[bytesRead] = 0;
+            /* String can contain colon as a process separator. In this case the
+               package name is before the colon. */
+            pos = 0;
+            while (pos < bytesRead && pkgName[pos] != ':' && pkgName[pos] != '\0')
+            {
+                ++pos;
+            }
+            pkgName[pos] = 0;
+            count = snprintf(injectionLibraryPathBuf, NVTX_BUFSIZE, "/data/data/%s/files/libNvtxInjection%s.so", pkgName, bits);
+            if (count <= 0 || count >= NVTX_BUFSIZE)
+            {
+                NVTX_ERR("Path buffer too small for: /data/data/%s/files/libNvtxInjection%s.so\n", pkgName, bits);
+                return NVTX_ERR_INIT_ACCESS_LIBRARY;
+            }
+            /* On Android, verify path is accessible due to aggressive file access restrictions. */
+            /* For dlopen, if the filename contains a leading slash, then it is interpreted as a */
+            /* relative or absolute pathname; otherwise it will follow the rules in ld.so. */
+            if (injectionLibraryPathBuf[0] == '/')
+            {
+#if (__ANDROID_API__ < 21)
+                int access_err = access(injectionLibraryPathBuf, F_OK | R_OK);
+#else
+                int access_err = faccessat(AT_FDCWD, injectionLibraryPathBuf, F_OK | R_OK, 0);
+#endif
+                if (access_err != 0)
+                {
+                    NVTX_ERR("Injection library path wasn't accessible [code=%s] [path=%s]\n", strerror(errno), injectionLibraryPathBuf);
+                    return NVTX_ERR_INIT_ACCESS_LIBRARY;
+                }
+            }
+            injectionLibraryPath = injectionLibraryPathBuf;
+        }
+#endif /* NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE */
+        /* At this point, `injectionLibraryPath` is specified if a dynamic
+           injection library was specified by a tool. */
+        if (injectionLibraryPath)
+        {
+            /* Load the injection library */
+            injectionLibraryHandle = NVTX_DLLOPEN(injectionLibraryPath);
+            if (!injectionLibraryHandle)
+            {
+                NVTX_ERR("Failed to load injection library\n");
+                return NVTX_ERR_INIT_LOAD_LIBRARY;
+            }
+            else
+            {
+                /* Attempt to get the injection library's entry-point. */
+                init_fnptr = NVTX_REINTERPRET_CAST(NvtxExtInitializeInjectionFunc_t, NVTX_DLLFUNC(injectionLibraryHandle, initFuncName));
+                if (!init_fnptr)
+                {
+                    NVTX_DLLCLOSE(injectionLibraryHandle);
+                    NVTX_ERR("Failed to get address of function %s from injection library\n", initFuncName);
+                    return NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT;
+                }
+            }
+        }
+    }
+#endif /* NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY */
+#if NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
+    if (!init_fnptr)
+    {
+        /* Use POSIX global symbol chain to query for init function from any module */
+        init_fnptr = NVTX_REINTERPRET_CAST(NvtxExtInitializeInjectionFunc_t, NVTX_DLLFUNC(NVTX_DLLDEFAULT, initFuncPreinjectName));
+    }
+#endif
+#if NVTX_SUPPORT_STATIC_INJECTION_LIBRARY
+    if (!init_fnptr)
+    {
+        /* Check weakly-defined function pointer.  A statically-linked injection can define this
+        *  as a normal symbol and set it to the address of the NVTX init function -- this will
+        *  provide a non-null value here.  If there is no other definition of this symbol, it
+        *  will be null here. */
+        if (InitializeInjectionNvtxExtension_fnptr)
+        {
+            init_fnptr = InitializeInjectionNvtxExtension_fnptr;
+        }
+    }
+#endif
+    if (out_init_fnptr)
+    {
+        *out_init_fnptr = init_fnptr;
+    }
+    /* At this point, if `init_fnptr` is not set, no tool has specified an NVTX injection library.
+       Non-success result is returned, so that all NVTX API functions will be set to no-ops. */
+    if (!init_fnptr)
+    {
+        return NVTX_ERR_NO_INJECTION_LIBRARY_AVAILABLE;
+    }
+    return NVTX_SUCCESS;
+}
+/* Avoid warnings about missing prototypes. */
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxExtInitOnce) (
+    nvtxExtModuleInfo_t* moduleInfo, intptr_t* moduleState);
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxExtInitOnce) (
+    nvtxExtModuleInfo_t* moduleInfo, intptr_t* moduleState)
+{
+    intptr_t old;
+    NVTX_INFO( "%s\n", __FUNCTION__ );
+    if (*moduleState == NVTX_EXTENSION_LOADED ||
+        *moduleState == NVTX_EXTENSION_DISABLED ||
+        *moduleState == NVTX_EXTENSION_INIT_FN_FAILED)
+    {
+        NVTX_INFO("Module loaded\n");
+        return;
+    }
+    NVTX_ATOMIC_CAS_PTR(
+        old,
+        moduleState,
+        NVTX_EXTENSION_STARTING,
+        NVTX_EXTENSION_FRESH);
+    if (old == NVTX_EXTENSION_FRESH)
+    {
+        intptr_t stateReturnValue = NVTX_EXTENSION_LOADED;
+        NvtxExtInitializeInjectionFunc_t init_fnptr =
+            NVTX_VERSIONED_IDENTIFIER(nvtxExtGlobals1).injectionFnPtr;
+        int entryPointStatus = 0;
+        int forceAllToNoops = 0;
+        size_t s;
+        /* Load and initialize injection library, which will assign the function pointers. */
+        if (init_fnptr == NVTX_NULLPTR)
+        {
+            int result = 0;
+            /* Try to load vanilla NVTX first. */
+            nvtxInitialize(NVTX_NULLPTR);
+            result = NVTX_VERSIONED_IDENTIFIER(nvtxExtLoadInjectionLibrary)(&init_fnptr);
+            /* At this point `init_fnptr` will be either 0 or a real function. */
+            if (result == NVTX_SUCCESS)
+            {
+                NVTX_VERSIONED_IDENTIFIER(nvtxExtGlobals1).injectionFnPtr = init_fnptr;
+            }
+            else
+            {
+                if (result == NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT)
+                {
+                    stateReturnValue = NVTX_EXTENSION_DISABLED;
+                }
+                NVTX_ERR("Failed to load injection library.\n");
+            }
+        }
+        if (init_fnptr != NVTX_NULLPTR)
+        {
+            /* Invoke injection library's initialization function. If it returns
+               0 (failure) and a dynamic injection was loaded, unload it. */
+            entryPointStatus = init_fnptr(moduleInfo);
+            if (entryPointStatus == 0)
+            {
+                stateReturnValue = NVTX_EXTENSION_INIT_FN_FAILED;
+                NVTX_ERR("Failed to initialize injection library -- initialization function returned 0\n");
+            }
+        }
+        /* Clean up any functions that are still uninitialized so that they are
+           skipped. Set all to null if injection init function failed as well. */
+        forceAllToNoops = (init_fnptr == NVTX_NULLPTR) || (entryPointStatus == 0);
+        for (s = 0; s < moduleInfo->segmentsCount; ++s)
+        {
+            nvtxExtModuleSegment_t* segment = moduleInfo->segments + s;
+            size_t i;
+            for (i = 0; i < segment->slotCount; ++i)
+            {
+                if (forceAllToNoops || (segment->functionSlots[i] == NVTX_EXTENSION_FRESH))
+                {
+                    segment->functionSlots[i] = NVTX_EXTENSION_DISABLED;
+                }
+            }
+        }
+        NVTX_MEMBAR();
+        /* Signal that initialization has finished and the function pointers are set. */
+        NVTX_ATOMIC_WRITE_PTR(moduleState, stateReturnValue);
+    }
+    else /* Spin-wait until initialization has finished. */
+    {
+        NVTX_MEMBAR();
+        while (*moduleState != NVTX_EXTENSION_LOADED &&
+               *moduleState != NVTX_EXTENSION_DISABLED &&
+               *moduleState != NVTX_EXTENSION_INIT_FN_FAILED)
+        {
+            NVTX_YIELD();
+            NVTX_MEMBAR();
+        }
+    }
+}
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtPayloadHelperInternal.h ADDED Viewed

	@@ -0,0 +1,294 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#ifndef NVTX_EXT_PAYLOAD_HELPER_INTERNAL_H
+#define NVTX_EXT_PAYLOAD_HELPER_INTERNAL_H
+/* General helper macros */
+#include "nvtxExtHelperMacros.h"
+/* Get variable name with line number (almost unique per file). */
+#define _NVTX_PAYLOAD_DATA_VAR NVTX_EXT_CONCAT(nvtxDFDB,__LINE__)
+/* Create real arguments from just pasting tokens next to each other. */
+#define _NVTX_PAYLOAD_PASS_THROUGH(...) __VA_ARGS__
+/* Avoid prefixing `NVTX_PAYLOAD_ENTRY_` for nested payloads. */
+#define NVTX_PAYLOAD_ENTRY_THROWAWAY
+#define _NVTX_PAYLOAD_NESTED(id) THROWAWAY id
+/*
+ * Create the NVTX binary payloads schema attributes.
+ *
+ * @param struct_id The name of the struct.
+ * @param schema_name The name of the schema.
+ * @param schema_flags Additional schema flags
+ * @param mask_add Fields to be added to the mask.
+ * @param num_entries The number schema entries.
+ */
+#define NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, schema_name, schema_flags, schema_id, mask_add, num_entries) \
+    nvtxPayloadSchemaAttr_t struct_id##Attr = { \
+        /*.fieldMask = */NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_TYPE | mask_add \
+            NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_ENTRIES | \
+            NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NUM_ENTRIES | \
+            NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_STATIC_SIZE, \
+        /*.name = */schema_name, \
+        /*.type = */NVTX_PAYLOAD_SCHEMA_TYPE_STATIC, \
+        /*.flags = */schema_flags, \
+        /*.entries = */struct_id##Schema, /*.numEntries = */num_entries, \
+        /*.payloadStaticSize = */sizeof(struct_id), \
+        /*.packAlign = */0, /*.schemaId = */schema_id};
+/*****************************************************************/
+/*** Helper for `NVTX_DEFINE_SCHEMA_FOR_STRUCT[_AND_REGISTER]` ***/
+/* First part of schema entry for different number of arguments. */
+#define _NVTX_PAYLOAD_SCHEMA_EF2(member, etype) \
+    0, NVTX_PAYLOAD_ENTRY_##etype, NVTX_NULLPTR, NVTX_NULLPTR, 0,
+#define _NVTX_PAYLOAD_SCHEMA_EF3(member, etype, name) \
+    0, NVTX_PAYLOAD_ENTRY_##etype, name, NVTX_NULLPTR, 0,
+#define _NVTX_PAYLOAD_SCHEMA_EF4(member, etype, name, desc) \
+    0, NVTX_PAYLOAD_ENTRY_##etype, name, desc, 0,
+#define _NVTX_PAYLOAD_SCHEMA_EF5(member, etype, name, desc, arraylen) \
+    0, NVTX_PAYLOAD_ENTRY_##etype, name, desc, arraylen,
+#define _NVTX_PAYLOAD_SCHEMA_EF6(member, etype, name, desc, arraylen, flags) \
+    NVTX_PAYLOAD_ENTRY_FLAG_##flags, NVTX_PAYLOAD_ENTRY_##etype, name, desc, arraylen,
+#define _NVTX_PAYLOAD_SCHEMA_ENTRY_FRONT(...) \
+    NVTX_EXT_CONCAT(_NVTX_PAYLOAD_SCHEMA_EF, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
+/* Second part of schema entry (append struct member).
+   (At least two arguments are passed (`member` and `etype`). */
+#define _NVTX_PAYLOAD_SCHEMA_ENTRY_END(member, ...) member
+/* Resolve to schema entry. `entry` is `(ctype, name, ...)`. */
+#define _NVTX_PAYLOAD_SCHEMA_ENTRY(struct_id, entry) \
+    {_NVTX_PAYLOAD_SCHEMA_ENTRY_FRONT entry \
+    offsetof(struct_id, _NVTX_PAYLOAD_SCHEMA_ENTRY_END entry)},
+/* Handle up to 16 schema entries. */
+#define _NVTX_PAYLOAD_SME1(s,e1,...)  _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1)
+#define _NVTX_PAYLOAD_SME2(s,e1,...)  _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME1(s,__VA_ARGS__)
+#define _NVTX_PAYLOAD_SME3(s,e1,...)  _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME2(s,__VA_ARGS__)
+#define _NVTX_PAYLOAD_SME4(s,e1,...)  _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME3(s,__VA_ARGS__)
+#define _NVTX_PAYLOAD_SME5(s,e1,...)  _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME4(s,__VA_ARGS__)
+#define _NVTX_PAYLOAD_SME6(s,e1,...)  _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME5(s,__VA_ARGS__)
+#define _NVTX_PAYLOAD_SME7(s,e1,...)  _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME6(s,__VA_ARGS__)
+#define _NVTX_PAYLOAD_SME8(s,e1,...)  _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME7(s,__VA_ARGS__)
+#define _NVTX_PAYLOAD_SME9(s,e1,...)  _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME8(s,__VA_ARGS__)
+#define _NVTX_PAYLOAD_SME10(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME9(s,__VA_ARGS__)
+#define _NVTX_PAYLOAD_SME11(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME10(s,__VA_ARGS__)
+#define _NVTX_PAYLOAD_SME12(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME11(s,__VA_ARGS__)
+#define _NVTX_PAYLOAD_SME13(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME12(s,__VA_ARGS__)
+#define _NVTX_PAYLOAD_SME14(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME13(s,__VA_ARGS__)
+#define _NVTX_PAYLOAD_SME15(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME14(s,__VA_ARGS__)
+#define _NVTX_PAYLOAD_SME16(s,e1,...) _NVTX_PAYLOAD_SCHEMA_ENTRY(s,e1) _NVTX_PAYLOAD_SME15(s,__VA_ARGS__)
+#define _NVTX_PAYLOAD_SCHEMA_ENTRIES(struct_id, ...) \
+  nvtxPayloadSchemaEntry_t struct_id##Schema[] = { \
+    NVTX_EXT_CONCAT(_NVTX_PAYLOAD_SME, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(struct_id, __VA_ARGS__) \
+    {0, 0} \
+  };
+/*
+ * Handle optional parameters for `NVTX_DEFINE_SCHEMA_FOR_STRUCT[_AND_REGISTER]`.
+ */
+#define _NVTX_DEFINE_S4S_6(struct_id, schema_name, prefix, schema_flags, schema_id, entries) \
+    prefix _NVTX_PAYLOAD_SCHEMA_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
+    prefix NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, schema_name, schema_flags, schema_id, \
+        NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_FLAGS | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_SCHEMA_ID |,\
+        NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
+#define _NVTX_DEFINE_S4S_5(struct_id, schema_name, prefix, schema_flags, entries) \
+    prefix _NVTX_PAYLOAD_SCHEMA_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
+    prefix NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, schema_name, schema_flags, 0, \
+        NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_FLAGS |, \
+        NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
+#define _NVTX_DEFINE_S4S_4(struct_id, schema_name, prefix, entries) \
+    prefix _NVTX_PAYLOAD_SCHEMA_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
+    prefix NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, schema_name, NVTX_PAYLOAD_SCHEMA_FLAG_NONE, 0, \
+        NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME |, \
+        NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
+#define _NVTX_DEFINE_S4S_3(struct_id, schema_name, entries) \
+    _NVTX_DEFINE_S4S_4(struct_id, schema_name, /*prefix*/, entries)
+#define _NVTX_DEFINE_S4S_2(struct_id, entries) \
+    _NVTX_PAYLOAD_SCHEMA_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
+    NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, NVTX_NULLPTR, NVTX_PAYLOAD_SCHEMA_FLAG_NONE, 0, ,\
+        NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
+#define _NVTX_DEFINE_SCHEMA_FOR_STRUCT(struct_id, ...) \
+    NVTX_EXT_CONCAT(_NVTX_DEFINE_S4S_, \
+        NVTX_EXT_NUM_ARGS(struct_id, __VA_ARGS__))(struct_id, __VA_ARGS__)
+/*** END: Helper for `NVTX_PAYLOAD_STATIC_SCHEMA_{DEFINE,SETUP}` ***/
+/******************************************************************/
+/*** Helper for `NVTX_DEFINE_STRUCT_WITH_SCHEMA[_AND_REGISTER]` ***/
+/* Extract struct member for fixed-size arrays. */
+#define _NVTX_PAYLOAD_STRUCT_ARR_MEM1(name) name
+#define _NVTX_PAYLOAD_STRUCT_ARR_MEM2(name, count) name[count]
+/* Extract type and member name and handle special case of fixed-size array. */
+#define _NVTX_PAYLOAD_STRUCT_E2(type, member) type member;
+#define _NVTX_PAYLOAD_STRUCT_E3(type, member, etype) type member;
+#define _NVTX_PAYLOAD_STRUCT_E4(type, member, etype, name) type member;
+#define _NVTX_PAYLOAD_STRUCT_E5(type, member, etype, name, desc) type member;
+#define _NVTX_PAYLOAD_STRUCT_E6(type, member, etype, name, desc, arraylen) \
+    type NVTX_EXT_CONCAT(_NVTX_PAYLOAD_STRUCT_ARR_MEM, NVTX_EXT_NUM_ARGS member) member;
+#define _NVTX_PAYLOAD_STRUCT_E7(type, member, etype, name, desc, arraylen, flags) \
+    _NVTX_PAYLOAD_STRUCT_E6(type, member, etype, name, desc, arraylen)
+/* Handle different number of arguments per struct entry. */
+#define _NVTX_PAYLOAD_STRUCT_ENTRY_(...) \
+    NVTX_EXT_CONCAT(_NVTX_PAYLOAD_STRUCT_E, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
+/* Handle up to 16 struct members. */
+#define _NVTX_PAYLOAD_STRUCT_ENTRY(entry) _NVTX_PAYLOAD_STRUCT_ENTRY_ entry
+#define _NVTX_PAYLOAD_STRUCT1(e1, ...)  _NVTX_PAYLOAD_STRUCT_ENTRY(e1)
+#define _NVTX_PAYLOAD_STRUCT2(e1, ...)  _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT1(__VA_ARGS__)
+#define _NVTX_PAYLOAD_STRUCT3(e1, ...)  _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT2(__VA_ARGS__)
+#define _NVTX_PAYLOAD_STRUCT4(e1, ...)  _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT3(__VA_ARGS__)
+#define _NVTX_PAYLOAD_STRUCT5(e1, ...)  _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT4(__VA_ARGS__)
+#define _NVTX_PAYLOAD_STRUCT6(e1, ...)  _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT5(__VA_ARGS__)
+#define _NVTX_PAYLOAD_STRUCT7(e1, ...)  _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT6(__VA_ARGS__)
+#define _NVTX_PAYLOAD_STRUCT8(e1, ...)  _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT7(__VA_ARGS__)
+#define _NVTX_PAYLOAD_STRUCT9(e1, ...)  _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT8(__VA_ARGS__)
+#define _NVTX_PAYLOAD_STRUCT10(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT9(__VA_ARGS__)
+#define _NVTX_PAYLOAD_STRUCT11(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT10(__VA_ARGS__)
+#define _NVTX_PAYLOAD_STRUCT12(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT11(__VA_ARGS__)
+#define _NVTX_PAYLOAD_STRUCT13(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT12(__VA_ARGS__)
+#define _NVTX_PAYLOAD_STRUCT14(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT13(__VA_ARGS__)
+#define _NVTX_PAYLOAD_STRUCT15(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT14(__VA_ARGS__)
+#define _NVTX_PAYLOAD_STRUCT16(e1, ...) _NVTX_PAYLOAD_STRUCT_ENTRY(e1) _NVTX_PAYLOAD_STRUCT15(__VA_ARGS__)
+/* Generate the typedef. */
+#define _NVTX_PAYLOAD_TYPEDEF_STRUCT(struct_id, ...) \
+  typedef struct { \
+      NVTX_EXT_CONCAT(_NVTX_PAYLOAD_STRUCT, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__) \
+  } struct_id;
+/* Generate first part of the schema entry. */
+#define _NVTX_PAYLOAD_INIT_SCHEMA_N3(type, memberId, etype) \
+    0, NVTX_PAYLOAD_ENTRY_##etype, NVTX_NULLPTR, NVTX_NULLPTR, 0,
+#define _NVTX_PAYLOAD_INIT_SCHEMA_N4(type, memberId, etype, name) \
+    0, NVTX_PAYLOAD_ENTRY_##etype, name, NVTX_NULLPTR, 0,
+#define _NVTX_PAYLOAD_INIT_SCHEMA_N5(type, memberId, etype, name, desc) \
+    0, NVTX_PAYLOAD_ENTRY_##etype, name, desc, 0,
+#define _NVTX_PAYLOAD_INIT_SCHEMA_N6(type, memberId, etype, name, desc, arraylen) \
+    0, NVTX_PAYLOAD_ENTRY_##etype, name, desc, arraylen,
+#define _NVTX_PAYLOAD_INIT_SCHEMA_N7(type, memberId, etype, name, desc, arraylen, flags) \
+    NVTX_PAYLOAD_ENTRY_FLAG_##flags, NVTX_PAYLOAD_ENTRY_##etype, name, desc, arraylen,
+#define _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY_FRONT(...) \
+    NVTX_EXT_CONCAT(_NVTX_PAYLOAD_INIT_SCHEMA_N, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
+#define _NVTX_PAYLOAD_ARRAY_MEMBER1(name) name
+#define _NVTX_PAYLOAD_ARRAY_MEMBER2(name, count) name
+/* Resolve to last part of schema entry (append struct member). */
+#define _NVTX_PAYLOAD_INIT_SCHEMA_NX3(type, memberId, ...) memberId
+#define _NVTX_PAYLOAD_INIT_SCHEMA_NX4(type, memberId, ...) memberId
+#define _NVTX_PAYLOAD_INIT_SCHEMA_NX5(type, memberId, ...) memberId
+#define _NVTX_PAYLOAD_INIT_SCHEMA_NX6(type, memberId, ...) \
+    NVTX_EXT_CONCAT(_NVTX_PAYLOAD_ARRAY_MEMBER, NVTX_EXT_NUM_ARGS memberId) memberId
+#define _NVTX_PAYLOAD_INIT_SCHEMA_NX7(type, memberId, ...) \
+    _NVTX_PAYLOAD_INIT_SCHEMA_NX6(type, memberId, __VA_ARGS__)
+#define _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY_END(...) \
+    NVTX_EXT_CONCAT(_NVTX_PAYLOAD_INIT_SCHEMA_NX, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
+/* Resolve to schema entry. `entry` is `(ctype, name, ...)`. */
+#define _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(struct_id, entry) \
+    {_NVTX_PAYLOAD_SCHEMA_INIT_ENTRY_FRONT entry \
+    offsetof(struct_id, _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY_END entry)},
+/* Handle up to 16 schema entries. */
+#define _NVTX_PAYLOAD_INIT_SME1(s, e1, ...)  _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1)
+#define _NVTX_PAYLOAD_INIT_SME2(s, e1, ...)  _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME1(s, __VA_ARGS__)
+#define _NVTX_PAYLOAD_INIT_SME3(s, e1, ...)  _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME2(s, __VA_ARGS__)
+#define _NVTX_PAYLOAD_INIT_SME4(s, e1, ...)  _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME3(s, __VA_ARGS__)
+#define _NVTX_PAYLOAD_INIT_SME5(s, e1, ...)  _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME4(s, __VA_ARGS__)
+#define _NVTX_PAYLOAD_INIT_SME6(s, e1, ...)  _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME5(s, __VA_ARGS__)
+#define _NVTX_PAYLOAD_INIT_SME7(s, e1, ...)  _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME6(s, __VA_ARGS__)
+#define _NVTX_PAYLOAD_INIT_SME8(s, e1, ...)  _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME7(s, __VA_ARGS__)
+#define _NVTX_PAYLOAD_INIT_SME9(s, e1, ...)  _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME8(s, __VA_ARGS__)
+#define _NVTX_PAYLOAD_INIT_SME10(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME9(s, __VA_ARGS__)
+#define _NVTX_PAYLOAD_INIT_SME11(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME10(s, __VA_ARGS__)
+#define _NVTX_PAYLOAD_INIT_SME12(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME11(s, __VA_ARGS__)
+#define _NVTX_PAYLOAD_INIT_SME13(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME12(s, __VA_ARGS__)
+#define _NVTX_PAYLOAD_INIT_SME14(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME13(s, __VA_ARGS__)
+#define _NVTX_PAYLOAD_INIT_SME15(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME14(s, __VA_ARGS__)
+#define _NVTX_PAYLOAD_INIT_SME16(s, e1, ...) _NVTX_PAYLOAD_SCHEMA_INIT_ENTRY(s, e1) _NVTX_PAYLOAD_INIT_SME15(s, __VA_ARGS__)
+#define _NVTX_PAYLOAD_SCHEMA_INIT_ENTRIES(struct_id, ...) \
+  nvtxPayloadSchemaEntry_t struct_id##Schema[] = { \
+    NVTX_EXT_CONCAT(_NVTX_PAYLOAD_INIT_SME, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(struct_id, __VA_ARGS__) \
+    {0, 0} \
+  };
+/*
+ * Handle optional parameters for `NVTX_DEFINE_STRUCT_WITH_SCHEMA[_AND_REGISTER]`.
+ */
+#define _NVTX_DEFINE_SWS_6(struct_id, schema_name, prefix, schema_flags, schema_id, entries) \
+  _NVTX_PAYLOAD_TYPEDEF_STRUCT(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
+  prefix _NVTX_PAYLOAD_SCHEMA_INIT_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
+  prefix NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, schema_name, schema_flags, schema_id, \
+      NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_FLAGS | \
+      NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_SCHEMA_ID |, \
+      NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
+#define _NVTX_DEFINE_SWS_5(struct_id, schema_name, prefix, schema_flags, entries) \
+  _NVTX_PAYLOAD_TYPEDEF_STRUCT(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
+  prefix _NVTX_PAYLOAD_SCHEMA_INIT_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
+  prefix NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, schema_name, schema_flags, 0, \
+      NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_FLAGS |, \
+      NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
+#define _NVTX_DEFINE_SWS_4(struct_id, schema_name, prefix, entries) \
+  _NVTX_PAYLOAD_TYPEDEF_STRUCT(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
+  prefix _NVTX_PAYLOAD_SCHEMA_INIT_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
+  prefix NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, schema_name, NVTX_PAYLOAD_SCHEMA_FLAG_NONE, 0, \
+      NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME |, \
+      NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
+#define _NVTX_DEFINE_SWS_3(struct_id, schema_name, entries) \
+  _NVTX_DEFINE_SWS_4(struct_id, schema_name, /* no prefix */, entries)
+#define _NVTX_DEFINE_SWS_2(struct_id, entries) \
+  _NVTX_PAYLOAD_TYPEDEF_STRUCT(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
+  _NVTX_PAYLOAD_SCHEMA_INIT_ENTRIES(struct_id, _NVTX_PAYLOAD_PASS_THROUGH entries) \
+  NVTX_PAYLOAD_SCHEMA_ATTR(struct_id, NVTX_NULLPTR, NVTX_PAYLOAD_SCHEMA_FLAG_NONE, 0, , \
+      NVTX_EXT_NUM_ARGS(_NVTX_PAYLOAD_PASS_THROUGH entries))
+#define _NVTX_DEFINE_STRUCT_WITH_SCHEMA(struct_id, ...) \
+    NVTX_EXT_CONCAT(_NVTX_DEFINE_SWS_, \
+        NVTX_EXT_NUM_ARGS(struct_id, __VA_ARGS__))(struct_id, __VA_ARGS__)
+/*** END: Helper for `NVTX_PAYLOAD_STATIC_SCHEMA_{INIT,CREATE}` */
+#endif /* NVTX_EXT_PAYLOAD_HELPER_INTERNAL_H */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtPayloadTypeInfo.h ADDED Viewed

	@@ -0,0 +1,189 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_EXT_IMPL_PAYLOAD_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExtPayload.h (except when NVTX_NO_IMPL is defined).
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+typedef void* nvtx_payload_pointer_type;
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
+#ifndef __APPLE__
+#include <uchar.h>
+#endif
+#include <stdalign.h>
+#endif
+/* `char8_t` is available as of C++20 or C23 */
+#if ((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || (defined(__cplusplus) && __cplusplus >= 201811L)) && !defined(__APPLE__)
+#define NVTX_HAVE_CHAR8 1
+#else
+#define NVTX_HAVE_CHAR8 0
+#endif
+/* `char16_t` and `char32_t` are available as of C++11 or C11 */
+#if ((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || (defined(__cplusplus) && __cplusplus >= 200704L)) && !defined(__APPLE__)
+#define NVTX_HAVE_CHAR16_CHAR32 1
+#else
+#define NVTX_HAVE_CHAR16_CHAR32 0
+#endif
+/* `alignof` is available as of C11 or C++11. */
+#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || (defined(__cplusplus) && __cplusplus >= 201103L)
+#define nvtx_alignof(type) alignof(type)
+#define nvtx_alignof2(type,tname) alignof(type)
+#else /* (__STDC_VERSION__ >= 201112L) || (__cplusplus >= 201103L) */
+/* Create helper structs to determine type alignment. */
+#define MKTYPEDEF(type) typedef struct {char c; type d;} _nvtx_##type
+#define MKTYPEDEF2(type,tname) typedef struct {char c; type d;} _nvtx_##tname
+MKTYPEDEF(char);
+MKTYPEDEF2(unsigned char, uchar);
+MKTYPEDEF(short);
+MKTYPEDEF2(unsigned short, ushort);
+MKTYPEDEF(int);
+MKTYPEDEF2(unsigned int, uint);
+MKTYPEDEF(long);
+MKTYPEDEF2(unsigned long, ulong);
+MKTYPEDEF2(long long, longlong);
+MKTYPEDEF2(unsigned long long, ulonglong);
+MKTYPEDEF(int8_t);
+MKTYPEDEF(uint8_t);
+MKTYPEDEF(int16_t);
+MKTYPEDEF(uint16_t);
+MKTYPEDEF(int32_t);
+MKTYPEDEF(uint32_t);
+MKTYPEDEF(int64_t);
+MKTYPEDEF(uint64_t);
+MKTYPEDEF(float);
+MKTYPEDEF(double);
+MKTYPEDEF2(long double, longdouble);
+MKTYPEDEF(size_t);
+MKTYPEDEF(nvtx_payload_pointer_type);
+MKTYPEDEF(wchar_t);
+#if NVTX_HAVE_CHAR8
+    MKTYPEDEF(char8_t);
+#endif
+#if NVTX_HAVE_CHAR16_CHAR32
+    MKTYPEDEF(char16_t);
+    MKTYPEDEF(char32_t);
+#endif
+/* C requires to include stddef.h to use `offsetof` */
+#ifndef __cplusplus
+#include <stddef.h>
+#endif
+#define nvtx_alignof(tname) offsetof(_nvtx_##tname, d)
+#define nvtx_alignof2(type, tname) offsetof(_nvtx_##tname, d)
+#endif /*  __STDC_VERSION__ >= 201112L */
+#undef MKTYPEDEF
+#undef MKTYPEDEF2
+/*
+ * Helper array to get the alignment for each predefined C/C++ language type.
+ * The order of entries must match the values in`enum nvtxPayloadSchemaEntryType`.
+ *
+ * In C++, `const` variables use internal linkage by default, but we need it to
+ * be public (extern) since weak declarations must be public.
+ */
+NVTX_LINKONCE_DEFINE_GLOBAL
+#ifdef __cplusplus
+extern
+#endif
+const nvtxPayloadEntryTypeInfo_t
+NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadTypeInfo)[NVTX_PAYLOAD_ENTRY_TYPE_INFO_ARRAY_SIZE] =
+{
+    /* The first entry contains this array's length and the size of each entry in this array. */
+    {NVTX_PAYLOAD_ENTRY_TYPE_INFO_ARRAY_SIZE, sizeof(nvtxPayloadEntryTypeInfo_t)},
+    /*** C integer types ***/
+    /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR */   {sizeof(char), nvtx_alignof(char)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_UCHAR */  {sizeof(unsigned char), nvtx_alignof2(unsigned char, uchar)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_SHORT */  {sizeof(short), nvtx_alignof(short)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_USHORT */ {sizeof(unsigned short), nvtx_alignof2(unsigned short, ushort)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_INT */    {sizeof(int), nvtx_alignof(int)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_UINT */   {sizeof(unsigned int), nvtx_alignof2(unsigned int, uint)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_LONG */   {sizeof(long), nvtx_alignof(long)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_ULONG */  {sizeof(unsigned long), nvtx_alignof2(unsigned long, ulong)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_LONGLONG */  {sizeof(long long), nvtx_alignof2(long long, longlong)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_ULONGLONG */ {sizeof(unsigned long long), nvtx_alignof2(unsigned long long,ulonglong)},
+    /*** Integer types with explicit size ***/
+    /* NVTX_PAYLOAD_ENTRY_TYPE_INT8 */   {sizeof(int8_t),   nvtx_alignof(int8_t)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_UINT8 */  {sizeof(uint8_t),  nvtx_alignof(uint8_t)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_INT16 */  {sizeof(int16_t),  nvtx_alignof(int16_t)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_UINT16 */ {sizeof(uint16_t), nvtx_alignof(uint16_t)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_INT32 */  {sizeof(int32_t),  nvtx_alignof(int32_t)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_UINT32 */ {sizeof(uint32_t), nvtx_alignof(uint32_t)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_INT64 */  {sizeof(int64_t),  nvtx_alignof(int64_t)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_UINT64 */ {sizeof(uint64_t), nvtx_alignof(uint64_t)},
+    /*** C floating point types ***/
+    /* NVTX_PAYLOAD_ENTRY_TYPE_FLOAT */      {sizeof(float),       nvtx_alignof(float)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_DOUBLE */     {sizeof(double),      nvtx_alignof(double)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_LONGDOUBLE */ {sizeof(long double), nvtx_alignof2(long double, longdouble)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_SIZE */    {sizeof(size_t),       nvtx_alignof(size_t)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_ADDRESS */ {sizeof(nvtx_payload_pointer_type), nvtx_alignof(nvtx_payload_pointer_type)},
+    /*** Special character types ***/
+    /* NVTX_PAYLOAD_ENTRY_TYPE_WCHAR */ {sizeof(wchar_t), nvtx_alignof(wchar_t)},
+#if NVTX_HAVE_CHAR8
+    /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR8 */ {sizeof(char8_t), nvtx_alignof(char8_t)},
+#else
+    /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR8 */ {0, 0},
+#endif
+#if NVTX_HAVE_CHAR16_CHAR32
+    /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR16 */ {sizeof(char16_t), nvtx_alignof(char16_t)},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR32 */ {sizeof(char32_t), nvtx_alignof(char32_t)}
+#else
+    /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR16 */ {0, 0},
+    /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR32 */ {0, 0}
+#endif
+};
+#undef nvtx_alignof
+#undef nvtx_alignof2
+#undef NVTX_HAVE_CHAR8
+#undef NVTX_HAVE_CHAR16_CHAR32

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxExtTypes.h ADDED Viewed

	@@ -0,0 +1,66 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_EXT_TYPES_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExt[EXTENSION].h.
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+/* This header defines types which are used by the internal implementation
+*  of NVTX and callback subscribers.  API clients do not use these types,
+*  so they are defined here instead of in nvToolsExt.h to clarify they are
+*  not part of the NVTX client API. */
+#ifndef NVTXEXTTYPES_H
+#define NVTXEXTTYPES_H
+typedef intptr_t (NVTX_API * NvtxExtGetExportFunction_t)(uint32_t exportFunctionId);
+typedef struct nvtxExtModuleSegment_t
+{
+    size_t segmentId;
+    size_t slotCount;
+    intptr_t* functionSlots;
+} nvtxExtModuleSegment_t;
+typedef struct nvtxExtModuleInfo_t
+{
+    uint16_t nvtxVer;
+    uint16_t structSize;
+    uint16_t moduleId;
+    uint16_t compatId;
+    size_t segmentsCount;
+    nvtxExtModuleSegment_t* segments;
+    NvtxExtGetExportFunction_t getExportFunction;
+    const void* extInfo;
+} nvtxExtModuleInfo_t;
+typedef int (NVTX_API * NvtxExtInitializeInjectionFunc_t)(nvtxExtModuleInfo_t* moduleInfo);
+#endif /* NVTXEXTTYPES_H */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImpl.h ADDED Viewed

	@@ -0,0 +1,464 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_IMPL_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <wchar.h>
+/* ---- Include required platform headers ---- */
+#if defined(_WIN32)
+#include <windows.h>
+#else
+#include <unistd.h>
+#if defined(__ANDROID__)
+#include <android/api-level.h>
+#endif
+#if defined(__linux__) || defined(__CYGWIN__)
+#include <sched.h>
+#endif
+#include <sys/types.h>
+#include <limits.h>
+#include <dlfcn.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <pthread.h>
+#endif
+/* ---- Define macros used in this file ---- */
+#define NVTX_INIT_STATE_FRESH 0
+#define NVTX_INIT_STATE_STARTED 1
+#define NVTX_INIT_STATE_COMPLETE 2
+#ifdef NVTX_DEBUG_PRINT
+#ifdef __ANDROID__
+#include <android/log.h>
+#define NVTX_ERR(...) __android_log_print(ANDROID_LOG_ERROR, "NVTOOLSEXT", __VA_ARGS__);
+#define NVTX_INFO(...) __android_log_print(ANDROID_LOG_INFO, "NVTOOLSEXT", __VA_ARGS__);
+#else
+#include <stdio.h>
+#define NVTX_ERR(...) fprintf(stderr, "NVTX_ERROR: " __VA_ARGS__)
+#define NVTX_INFO(...) fprintf(stderr, "NVTX_INFO: " __VA_ARGS__)
+#endif
+#else /* !defined(NVTX_DEBUG_PRINT) */
+#define NVTX_ERR(...)
+#define NVTX_INFO(...)
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+#ifdef __GNUC__
+#pragma GCC visibility push(hidden)
+#endif
+/* ---- Forward declare all functions referenced in globals ---- */
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(void);
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiGetModuleFunctionTable)(
+    NvtxCallbackModule callback_module,
+    NvtxFunctionTable* out_table,
+    unsigned int* out_size);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiSetInjectionNvtxVersion)(
+    uint32_t version);
+NVTX_LINKONCE_FWDDECL_FUNCTION const void* NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable)(
+    uint32_t exportTableId);
+#include "nvtxInitDecls.h"
+/* ---- Define all globals ---- */
+typedef struct nvtxGlobals_t
+{
+    volatile unsigned int initState;
+    NvtxExportTableCallbacks etblCallbacks;
+    NvtxExportTableVersionInfo etblVersionInfo;
+    /* Implementation function pointers */
+    nvtxMarkEx_impl_fntype nvtxMarkEx_impl_fnptr;
+    nvtxMarkA_impl_fntype nvtxMarkA_impl_fnptr;
+    nvtxMarkW_impl_fntype nvtxMarkW_impl_fnptr;
+    nvtxRangeStartEx_impl_fntype nvtxRangeStartEx_impl_fnptr;
+    nvtxRangeStartA_impl_fntype nvtxRangeStartA_impl_fnptr;
+    nvtxRangeStartW_impl_fntype nvtxRangeStartW_impl_fnptr;
+    nvtxRangeEnd_impl_fntype nvtxRangeEnd_impl_fnptr;
+    nvtxRangePushEx_impl_fntype nvtxRangePushEx_impl_fnptr;
+    nvtxRangePushA_impl_fntype nvtxRangePushA_impl_fnptr;
+    nvtxRangePushW_impl_fntype nvtxRangePushW_impl_fnptr;
+    nvtxRangePop_impl_fntype nvtxRangePop_impl_fnptr;
+    nvtxNameCategoryA_impl_fntype nvtxNameCategoryA_impl_fnptr;
+    nvtxNameCategoryW_impl_fntype nvtxNameCategoryW_impl_fnptr;
+    nvtxNameOsThreadA_impl_fntype nvtxNameOsThreadA_impl_fnptr;
+    nvtxNameOsThreadW_impl_fntype nvtxNameOsThreadW_impl_fnptr;
+    nvtxNameCuDeviceA_fakeimpl_fntype nvtxNameCuDeviceA_impl_fnptr;
+    nvtxNameCuDeviceW_fakeimpl_fntype nvtxNameCuDeviceW_impl_fnptr;
+    nvtxNameCuContextA_fakeimpl_fntype nvtxNameCuContextA_impl_fnptr;
+    nvtxNameCuContextW_fakeimpl_fntype nvtxNameCuContextW_impl_fnptr;
+    nvtxNameCuStreamA_fakeimpl_fntype nvtxNameCuStreamA_impl_fnptr;
+    nvtxNameCuStreamW_fakeimpl_fntype nvtxNameCuStreamW_impl_fnptr;
+    nvtxNameCuEventA_fakeimpl_fntype nvtxNameCuEventA_impl_fnptr;
+    nvtxNameCuEventW_fakeimpl_fntype nvtxNameCuEventW_impl_fnptr;
+    nvtxNameClDeviceA_fakeimpl_fntype nvtxNameClDeviceA_impl_fnptr;
+    nvtxNameClDeviceW_fakeimpl_fntype nvtxNameClDeviceW_impl_fnptr;
+    nvtxNameClContextA_fakeimpl_fntype nvtxNameClContextA_impl_fnptr;
+    nvtxNameClContextW_fakeimpl_fntype nvtxNameClContextW_impl_fnptr;
+    nvtxNameClCommandQueueA_fakeimpl_fntype nvtxNameClCommandQueueA_impl_fnptr;
+    nvtxNameClCommandQueueW_fakeimpl_fntype nvtxNameClCommandQueueW_impl_fnptr;
+    nvtxNameClMemObjectA_fakeimpl_fntype nvtxNameClMemObjectA_impl_fnptr;
+    nvtxNameClMemObjectW_fakeimpl_fntype nvtxNameClMemObjectW_impl_fnptr;
+    nvtxNameClSamplerA_fakeimpl_fntype nvtxNameClSamplerA_impl_fnptr;
+    nvtxNameClSamplerW_fakeimpl_fntype nvtxNameClSamplerW_impl_fnptr;
+    nvtxNameClProgramA_fakeimpl_fntype nvtxNameClProgramA_impl_fnptr;
+    nvtxNameClProgramW_fakeimpl_fntype nvtxNameClProgramW_impl_fnptr;
+    nvtxNameClEventA_fakeimpl_fntype nvtxNameClEventA_impl_fnptr;
+    nvtxNameClEventW_fakeimpl_fntype nvtxNameClEventW_impl_fnptr;
+    nvtxNameCudaDeviceA_fakeimpl_fntype nvtxNameCudaDeviceA_impl_fnptr;
+    nvtxNameCudaDeviceW_fakeimpl_fntype nvtxNameCudaDeviceW_impl_fnptr;
+    nvtxNameCudaStreamA_fakeimpl_fntype nvtxNameCudaStreamA_impl_fnptr;
+    nvtxNameCudaStreamW_fakeimpl_fntype nvtxNameCudaStreamW_impl_fnptr;
+    nvtxNameCudaEventA_fakeimpl_fntype nvtxNameCudaEventA_impl_fnptr;
+    nvtxNameCudaEventW_fakeimpl_fntype nvtxNameCudaEventW_impl_fnptr;
+    nvtxDomainMarkEx_impl_fntype nvtxDomainMarkEx_impl_fnptr;
+    nvtxDomainRangeStartEx_impl_fntype nvtxDomainRangeStartEx_impl_fnptr;
+    nvtxDomainRangeEnd_impl_fntype nvtxDomainRangeEnd_impl_fnptr;
+    nvtxDomainRangePushEx_impl_fntype nvtxDomainRangePushEx_impl_fnptr;
+    nvtxDomainRangePop_impl_fntype nvtxDomainRangePop_impl_fnptr;
+    nvtxDomainResourceCreate_impl_fntype nvtxDomainResourceCreate_impl_fnptr;
+    nvtxDomainResourceDestroy_impl_fntype nvtxDomainResourceDestroy_impl_fnptr;
+    nvtxDomainNameCategoryA_impl_fntype nvtxDomainNameCategoryA_impl_fnptr;
+    nvtxDomainNameCategoryW_impl_fntype nvtxDomainNameCategoryW_impl_fnptr;
+    nvtxDomainRegisterStringA_impl_fntype nvtxDomainRegisterStringA_impl_fnptr;
+    nvtxDomainRegisterStringW_impl_fntype nvtxDomainRegisterStringW_impl_fnptr;
+    nvtxDomainCreateA_impl_fntype nvtxDomainCreateA_impl_fnptr;
+    nvtxDomainCreateW_impl_fntype nvtxDomainCreateW_impl_fnptr;
+    nvtxDomainDestroy_impl_fntype nvtxDomainDestroy_impl_fnptr;
+    nvtxInitialize_impl_fntype nvtxInitialize_impl_fnptr;
+    nvtxDomainSyncUserCreate_fakeimpl_fntype nvtxDomainSyncUserCreate_impl_fnptr;
+    nvtxDomainSyncUserDestroy_fakeimpl_fntype nvtxDomainSyncUserDestroy_impl_fnptr;
+    nvtxDomainSyncUserAcquireStart_fakeimpl_fntype nvtxDomainSyncUserAcquireStart_impl_fnptr;
+    nvtxDomainSyncUserAcquireFailed_fakeimpl_fntype nvtxDomainSyncUserAcquireFailed_impl_fnptr;
+    nvtxDomainSyncUserAcquireSuccess_fakeimpl_fntype nvtxDomainSyncUserAcquireSuccess_impl_fnptr;
+    nvtxDomainSyncUserReleasing_fakeimpl_fntype nvtxDomainSyncUserReleasing_impl_fnptr;
+    /* Tables of function pointers -- Extra null added to the end to ensure
+    *  a crash instead of silent corruption if a tool reads off the end. */
+    NvtxFunctionPointer* functionTable_CORE  [NVTX_CBID_CORE_SIZE   + 1];
+    NvtxFunctionPointer* functionTable_CUDA  [NVTX_CBID_CUDA_SIZE   + 1];
+    NvtxFunctionPointer* functionTable_OPENCL[NVTX_CBID_OPENCL_SIZE + 1];
+    NvtxFunctionPointer* functionTable_CUDART[NVTX_CBID_CUDART_SIZE + 1];
+    NvtxFunctionPointer* functionTable_CORE2 [NVTX_CBID_CORE2_SIZE  + 1];
+    NvtxFunctionPointer* functionTable_SYNC  [NVTX_CBID_SYNC_SIZE   + 1];
+} nvtxGlobals_t;
+#define NVTX_GLOBAL_TABLE_ENTRY(name) ( NVTX_REINTERPRET_CAST(NvtxFunctionPointer*, &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).name ## _impl_fnptr ) )
+NVTX_LINKONCE_DEFINE_GLOBAL nvtxGlobals_t NVTX_VERSIONED_IDENTIFIER(nvtxGlobals) =
+{
+    NVTX_INIT_STATE_FRESH,
+    {
+        sizeof(NvtxExportTableCallbacks),
+        NVTX_VERSIONED_IDENTIFIER(nvtxEtiGetModuleFunctionTable)
+    },
+    {
+        sizeof(NvtxExportTableVersionInfo),
+        NVTX_VERSION,
+        0,
+        NVTX_VERSIONED_IDENTIFIER(nvtxEtiSetInjectionNvtxVersion)
+    },
+    /* Implementation function pointers */
+    NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init),
+    NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init),
+    /* Tables of function pointers */
+    {
+        NVTX_NULLPTR,
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxMarkEx),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxMarkA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxMarkW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxRangeStartEx),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxRangeStartA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxRangeStartW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxRangeEnd),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxRangePushEx),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxRangePushA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxRangePushW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxRangePop),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCategoryA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCategoryW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameOsThreadA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameOsThreadW),
+        NVTX_NULLPTR
+    },
+    {
+        NVTX_NULLPTR,
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuDeviceA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuDeviceW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuContextA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuContextW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuStreamA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuStreamW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuEventA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCuEventW),
+        NVTX_NULLPTR
+    },
+    {
+        NVTX_NULLPTR,
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClDeviceA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClDeviceW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClContextA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClContextW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClCommandQueueA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClCommandQueueW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClMemObjectA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClMemObjectW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClSamplerA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClSamplerW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClProgramA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClProgramW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClEventA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameClEventW),
+        NVTX_NULLPTR
+    },
+    {
+        NVTX_NULLPTR,
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCudaDeviceA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCudaDeviceW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCudaStreamA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCudaStreamW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCudaEventA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxNameCudaEventW),
+        NVTX_NULLPTR
+    },
+    {
+        NVTX_NULLPTR,
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainMarkEx),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainRangeStartEx),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainRangeEnd),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainRangePushEx),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainRangePop),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainResourceCreate),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainResourceDestroy),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainNameCategoryA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainNameCategoryW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainRegisterStringA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainRegisterStringW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainCreateA),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainCreateW),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainDestroy),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxInitialize),
+        NVTX_NULLPTR
+    },
+    {
+        NVTX_NULLPTR,
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainSyncUserCreate),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainSyncUserDestroy),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainSyncUserAcquireStart),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainSyncUserAcquireFailed),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainSyncUserAcquireSuccess),
+        NVTX_GLOBAL_TABLE_ENTRY(nvtxDomainSyncUserReleasing),
+        NVTX_NULLPTR
+    }
+};
+#undef NVTX_GLOBAL_TABLE_ENTRY
+/* ---- Define static inline implementations of core API functions ---- */
+#include "nvtxImplCore.h"
+/* ---- Define implementations of export table functions ---- */
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiGetModuleFunctionTable)(
+    NvtxCallbackModule callback_module,
+    NvtxFunctionTable* out_table,
+    unsigned int* out_size)
+{
+    unsigned int bytes = 0;
+    NvtxFunctionTable table = NVTX_NULLPTR;
+    switch (callback_module)
+    {
+    case NVTX_CB_MODULE_CORE:
+        table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE;
+        bytes = NVTX_STATIC_CAST(unsigned int, sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE));
+        break;
+    case NVTX_CB_MODULE_CUDA:
+        table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDA;
+        bytes = NVTX_STATIC_CAST(unsigned int, sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDA));
+        break;
+    case NVTX_CB_MODULE_OPENCL:
+        table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_OPENCL;
+        bytes = NVTX_STATIC_CAST(unsigned int, sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_OPENCL));
+        break;
+    case NVTX_CB_MODULE_CUDART:
+        table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDART;
+        bytes = NVTX_STATIC_CAST(unsigned int, sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDART));
+        break;
+    case NVTX_CB_MODULE_CORE2:
+        table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE2;
+        bytes = NVTX_STATIC_CAST(unsigned int, sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE2));
+        break;
+    case NVTX_CB_MODULE_SYNC:
+        table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_SYNC;
+        bytes = NVTX_STATIC_CAST(unsigned int, sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_SYNC));
+        break;
+    case NVTX_CB_MODULE_INVALID:
+    case NVTX_CB_MODULE_SIZE:
+    case NVTX_CB_MODULE_FORCE_INT:
+    default: return 0;
+    }
+    if (out_size)
+        *out_size = (bytes / NVTX_STATIC_CAST(unsigned int, sizeof(NvtxFunctionPointer*))) - 1;
+    if (out_table)
+        *out_table = table;
+    return 1;
+}
+NVTX_LINKONCE_DEFINE_FUNCTION const void* NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable)(uint32_t exportTableId)
+{
+    switch (exportTableId)
+    {
+    case NVTX_ETID_CALLBACKS:       return &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).etblCallbacks;
+    case NVTX_ETID_VERSIONINFO:     return &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).etblVersionInfo;
+    default:                        return NVTX_NULLPTR;
+    }
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiSetInjectionNvtxVersion)(uint32_t version)
+{
+    /* Reserved for custom implementations to resolve problems with tools */
+    (void)version;
+}
+/* ---- Define implementations of init versions of all API functions ---- */
+#include "nvtxInitDefs.h"
+/* ---- Define implementations of initialization functions ---- */
+#include "nvtxInit.h"
+#ifdef __GNUC__
+#pragma GCC visibility pop
+#endif
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplCore.h ADDED Viewed

	@@ -0,0 +1,432 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+NVTX_DECLSPEC void NVTX_API nvtxMarkEx(const nvtxEventAttributes_t* eventAttrib)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)eventAttrib;
+#else /* NVTX_DISABLE */
+    nvtxMarkEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(eventAttrib);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxMarkA(const char* message)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)message;
+#else /* NVTX_DISABLE */
+    nvtxMarkA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(message);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxMarkW(const wchar_t* message)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)message;
+#else /* NVTX_DISABLE */
+    nvtxMarkW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(message);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartEx(const nvtxEventAttributes_t* eventAttrib)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)eventAttrib;
+#else /* NVTX_DISABLE */
+    nvtxRangeStartEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        return (*local)(eventAttrib);
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_STATIC_CAST(nvtxRangeId_t, 0);
+}
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartA(const char* message)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)message;
+#else /* NVTX_DISABLE */
+    nvtxRangeStartA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        return (*local)(message);
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_STATIC_CAST(nvtxRangeId_t, 0);
+}
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartW(const wchar_t* message)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)message;
+#else /* NVTX_DISABLE */
+    nvtxRangeStartW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        return (*local)(message);
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_STATIC_CAST(nvtxRangeId_t, 0);
+}
+NVTX_DECLSPEC void NVTX_API nvtxRangeEnd(nvtxRangeId_t id)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)id;
+#else /* NVTX_DISABLE */
+    nvtxRangeEnd_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(id);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC int NVTX_API nvtxRangePushEx(const nvtxEventAttributes_t* eventAttrib)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)eventAttrib;
+#else /* NVTX_DISABLE */
+    nvtxRangePushEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        return (*local)(eventAttrib);
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_STATIC_CAST(int, NVTX_NO_PUSH_POP_TRACKING);
+}
+NVTX_DECLSPEC int NVTX_API nvtxRangePushA(const char* message)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)message;
+#else /* NVTX_DISABLE */
+    nvtxRangePushA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        return (*local)(message);
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_STATIC_CAST(int, NVTX_NO_PUSH_POP_TRACKING);
+}
+NVTX_DECLSPEC int NVTX_API nvtxRangePushW(const wchar_t* message)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)message;
+#else /* NVTX_DISABLE */
+    nvtxRangePushW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        return (*local)(message);
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_STATIC_CAST(int, NVTX_NO_PUSH_POP_TRACKING);
+}
+NVTX_DECLSPEC int NVTX_API nvtxRangePop(void)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifndef NVTX_DISABLE
+    nvtxRangePop_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        return (*local)();
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_STATIC_CAST(int, NVTX_NO_PUSH_POP_TRACKING);
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCategoryA(uint32_t category, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)category;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCategoryA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(category, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCategoryW(uint32_t category, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)category;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCategoryW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(category, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadA(uint32_t threadId, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)threadId;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameOsThreadA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(threadId, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadW(uint32_t threadId, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)threadId;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameOsThreadW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(threadId, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainMarkEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)domain;
+    (void)eventAttrib;
+#else /* NVTX_DISABLE */
+    nvtxDomainMarkEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(domain, eventAttrib);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxDomainRangeStartEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)domain;
+    (void)eventAttrib;
+#else /* NVTX_DISABLE */
+    nvtxDomainRangeStartEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        return (*local)(domain, eventAttrib);
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_STATIC_CAST(nvtxRangeId_t, 0);
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainRangeEnd(nvtxDomainHandle_t domain, nvtxRangeId_t id)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)domain;
+    (void)id;
+#else /* NVTX_DISABLE */
+    nvtxDomainRangeEnd_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(domain, id);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC int NVTX_API nvtxDomainRangePushEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)domain;
+    (void)eventAttrib;
+#else /* NVTX_DISABLE */
+    nvtxDomainRangePushEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        return (*local)(domain, eventAttrib);
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_STATIC_CAST(int, NVTX_NO_PUSH_POP_TRACKING);
+}
+NVTX_DECLSPEC int NVTX_API nvtxDomainRangePop(nvtxDomainHandle_t domain)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)domain;
+#else /* NVTX_DISABLE */
+    nvtxDomainRangePop_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        return (*local)(domain);
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_STATIC_CAST(int, NVTX_NO_PUSH_POP_TRACKING);
+}
+NVTX_DECLSPEC nvtxResourceHandle_t NVTX_API nvtxDomainResourceCreate(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)domain;
+    (void)attribs;
+#else /* NVTX_DISABLE */
+    nvtxDomainResourceCreate_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        return (*local)(domain, attribs);
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_NULLPTR;
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainResourceDestroy(nvtxResourceHandle_t resource)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)resource;
+#else /* NVTX_DISABLE */
+    nvtxDomainResourceDestroy_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(resource);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryA(nvtxDomainHandle_t domain, uint32_t category, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)domain;
+    (void)category;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxDomainNameCategoryA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(domain, category, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryW(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)domain;
+    (void)category;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxDomainNameCategoryW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(domain, category, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringA(nvtxDomainHandle_t domain, const char* string)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)domain;
+    (void)string;
+#else /* NVTX_DISABLE */
+    nvtxDomainRegisterStringA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        return (*local)(domain, string);
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_NULLPTR;
+}
+NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringW(nvtxDomainHandle_t domain, const wchar_t* string)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)domain;
+    (void)string;
+#else /* NVTX_DISABLE */
+    nvtxDomainRegisterStringW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        return (*local)(domain, string);
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_NULLPTR;
+}
+NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateA(const char* message)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)message;
+#else /* NVTX_DISABLE */
+    nvtxDomainCreateA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        return (*local)(message);
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_NULLPTR;
+}
+NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateW(const wchar_t* message)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)message;
+#else /* NVTX_DISABLE */
+    nvtxDomainCreateW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        return (*local)(message);
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_NULLPTR;
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainDestroy(nvtxDomainHandle_t domain)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)domain;
+#else /* NVTX_DISABLE */
+    nvtxDomainDestroy_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(domain);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxInitialize(const void* reserved)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)reserved;
+#else /* NVTX_DISABLE */
+    nvtxInitialize_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(reserved);
+#endif /* NVTX_DISABLE */
+}

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplCudaRt_v3.h ADDED Viewed

	@@ -0,0 +1,128 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_IMPL_GUARD_CUDART
+#error Never include this file directly -- it is automatically included by nvToolsExtCudaRt.h (except when NVTX_NO_IMPL is defined).
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+typedef void (NVTX_API * nvtxNameCudaDeviceA_impl_fntype)(int device, const char* name);
+typedef void (NVTX_API * nvtxNameCudaDeviceW_impl_fntype)(int device, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCudaStreamA_impl_fntype)(cudaStream_t stream, const char* name);
+typedef void (NVTX_API * nvtxNameCudaStreamW_impl_fntype)(cudaStream_t stream, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCudaEventA_impl_fntype)(cudaEvent_t event, const char* name);
+typedef void (NVTX_API * nvtxNameCudaEventW_impl_fntype)(cudaEvent_t event, const wchar_t* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceA(int device, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)device;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCudaDeviceA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(device, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceW(int device, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)device;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCudaDeviceW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(device, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamA(cudaStream_t stream, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)stream;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCudaStreamA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCudaStreamA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(stream, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamW(cudaStream_t stream, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)stream;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCudaStreamW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCudaStreamW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(stream, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventA(cudaEvent_t event, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)event;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCudaEventA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCudaEventA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(event, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventW(cudaEvent_t event, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)event;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCudaEventW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCudaEventW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(event, name);
+#endif /* NVTX_DISABLE */
+}
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplCuda_v3.h ADDED Viewed

	@@ -0,0 +1,156 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_IMPL_GUARD_CUDA
+#error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+typedef void (NVTX_API * nvtxNameCuDeviceA_impl_fntype)(CUdevice device, const char* name);
+typedef void (NVTX_API * nvtxNameCuDeviceW_impl_fntype)(CUdevice device, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCuContextA_impl_fntype)(CUcontext context, const char* name);
+typedef void (NVTX_API * nvtxNameCuContextW_impl_fntype)(CUcontext context, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCuStreamA_impl_fntype)(CUstream stream, const char* name);
+typedef void (NVTX_API * nvtxNameCuStreamW_impl_fntype)(CUstream stream, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCuEventA_impl_fntype)(CUevent event, const char* name);
+typedef void (NVTX_API * nvtxNameCuEventW_impl_fntype)(CUevent event, const wchar_t* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceA(CUdevice device, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)device;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCuDeviceA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(device, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceW(CUdevice device, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)device;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCuDeviceW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr;
+    if (local != NVTX_NULLPTR)
+        (*local)(device, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCuContextA(CUcontext context, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)context;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCuContextA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCuContextA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(context, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCuContextW(CUcontext context, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)context;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCuContextW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCuContextW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(context, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamA(CUstream stream, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)stream;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCuStreamA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCuStreamA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(stream, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamW(CUstream stream, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)stream;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCuStreamW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCuStreamW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(stream, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCuEventA(CUevent event, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)event;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCuEventA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCuEventA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(event, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCuEventW(CUevent event, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)event;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameCuEventW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameCuEventW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(event, name);
+#endif /* NVTX_DISABLE */
+}
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplOpenCL_v3.h ADDED Viewed

	@@ -0,0 +1,239 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_IMPL_GUARD_OPENCL
+#error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+typedef void (NVTX_API * nvtxNameClDeviceA_impl_fntype)(cl_device_id device, const char* name);
+typedef void (NVTX_API * nvtxNameClDeviceW_impl_fntype)(cl_device_id device, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClContextA_impl_fntype)(cl_context context, const char* name);
+typedef void (NVTX_API * nvtxNameClContextW_impl_fntype)(cl_context context, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClCommandQueueA_impl_fntype)(cl_command_queue command_queue, const char* name);
+typedef void (NVTX_API * nvtxNameClCommandQueueW_impl_fntype)(cl_command_queue command_queue, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClMemObjectA_impl_fntype)(cl_mem memobj, const char* name);
+typedef void (NVTX_API * nvtxNameClMemObjectW_impl_fntype)(cl_mem memobj, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClSamplerA_impl_fntype)(cl_sampler sampler, const char* name);
+typedef void (NVTX_API * nvtxNameClSamplerW_impl_fntype)(cl_sampler sampler, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClProgramA_impl_fntype)(cl_program program, const char* name);
+typedef void (NVTX_API * nvtxNameClProgramW_impl_fntype)(cl_program program, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClEventA_impl_fntype)(cl_event evnt, const char* name);
+typedef void (NVTX_API * nvtxNameClEventW_impl_fntype)(cl_event evnt, const wchar_t* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceA(cl_device_id device, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)device;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameClDeviceA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClDeviceA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(device, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceW(cl_device_id device, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)device;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameClDeviceW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClDeviceW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(device, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClContextA(cl_context context, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)context;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameClContextA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClContextA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(context, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClContextW(cl_context context, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)context;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameClContextW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClContextW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(context, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueA(cl_command_queue command_queue, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)command_queue;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameClCommandQueueA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClCommandQueueA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(command_queue, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueW(cl_command_queue command_queue, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)command_queue;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameClCommandQueueW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClCommandQueueW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(command_queue, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectA(cl_mem memobj, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)memobj;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameClMemObjectA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClMemObjectA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(memobj, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectW(cl_mem memobj, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)memobj;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameClMemObjectW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClMemObjectW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(memobj, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerA(cl_sampler sampler, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)sampler;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameClSamplerA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClSamplerA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(sampler, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerW(cl_sampler sampler, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)sampler;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameClSamplerW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClSamplerW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(sampler, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClProgramA(cl_program program, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)program;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameClProgramA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClProgramA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(program, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClProgramW(cl_program program, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)program;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameClProgramW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClProgramW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(program, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClEventA(cl_event evnt, const char* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)evnt;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameClEventA_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClEventA_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(evnt, name);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClEventW(cl_event evnt, const wchar_t* name)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)evnt;
+    (void)name;
+#else /* NVTX_DISABLE */
+    nvtxNameClEventW_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxNameClEventW_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(evnt, name);
+#endif /* NVTX_DISABLE */
+}
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxImplSync_v3.h ADDED Viewed

	@@ -0,0 +1,124 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_IMPL_GUARD_SYNC
+#error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+typedef nvtxSyncUser_t (NVTX_API * nvtxDomainSyncUserCreate_impl_fntype)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
+typedef void (NVTX_API * nvtxDomainSyncUserDestroy_impl_fntype)(nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserAcquireStart_impl_fntype)(nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserAcquireFailed_impl_fntype)(nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserAcquireSuccess_impl_fntype)(nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserReleasing_impl_fntype)(nvtxSyncUser_t handle);
+NVTX_DECLSPEC nvtxSyncUser_t NVTX_API nvtxDomainSyncUserCreate(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)domain;
+    (void)attribs;
+#else /* NVTX_DISABLE */
+    nvtxDomainSyncUserCreate_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxDomainSyncUserCreate_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        return (*local)(domain, attribs);
+    else
+#endif /* NVTX_DISABLE */
+        return NVTX_NULLPTR;
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserDestroy(nvtxSyncUser_t handle)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)handle;
+#else /* NVTX_DISABLE */
+    nvtxDomainSyncUserDestroy_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxDomainSyncUserDestroy_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(handle);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireStart(nvtxSyncUser_t handle)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)handle;
+#else /* NVTX_DISABLE */
+    nvtxDomainSyncUserAcquireStart_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxDomainSyncUserAcquireStart_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(handle);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireFailed(nvtxSyncUser_t handle)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)handle;
+#else /* NVTX_DISABLE */
+    nvtxDomainSyncUserAcquireFailed_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxDomainSyncUserAcquireFailed_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(handle);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireSuccess(nvtxSyncUser_t handle)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)handle;
+#else /* NVTX_DISABLE */
+    nvtxDomainSyncUserAcquireSuccess_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxDomainSyncUserAcquireSuccess_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(handle);
+#endif /* NVTX_DISABLE */
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserReleasing(nvtxSyncUser_t handle)
+{
+    NVTX_SET_NAME_MANGLING_OPTIONS
+#ifdef NVTX_DISABLE
+    (void)handle;
+#else /* NVTX_DISABLE */
+    nvtxDomainSyncUserReleasing_impl_fntype local = NVTX_REINTERPRET_CAST(nvtxDomainSyncUserReleasing_impl_fntype, NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr);
+    if (local != NVTX_NULLPTR)
+        (*local)(handle);
+#endif /* NVTX_DISABLE */
+}
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxInit.h ADDED Viewed

	@@ -0,0 +1,468 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_IMPL_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+/* ---- Platform-independent helper definitions and functions ---- */
+/* Prefer macros over inline functions to reduce symbol resolution at link time */
+#if defined(_WIN32)
+#define NVTX_PATHCHAR   wchar_t
+#define NVTX_STR(x)     L##x
+#define NVTX_GETENV     _wgetenv
+#define NVTX_BUFSIZE    16384
+#define NVTX_DLLHANDLE  HMODULE
+#define NVTX_DLLOPEN(x) LoadLibraryW(x)
+#define NVTX_DLLFUNC(h, x) NVTX_REINTERPRET_CAST(void(*)(void), GetProcAddress((h), (x)))
+#define NVTX_DLLCLOSE   FreeLibrary
+#define NVTX_DLLDEFAULT NVTX_NULLPTR
+#define NVTX_YIELD()    SwitchToThread()
+#define NVTX_MEMBAR()   MemoryBarrier()
+#define NVTX_ATOMIC_WRITE_32(address, value) \
+    InterlockedExchange(NVTX_REINTERPRET_CAST(volatile LONG*, (address)), (value))
+#define NVTX_ATOMIC_CAS_32(old, address, exchange, comparand) \
+    (old) = InterlockedCompareExchange(NVTX_REINTERPRET_CAST(volatile LONG*, (address)), (exchange), (comparand))
+#elif defined(__GNUC__)
+#define NVTX_PATHCHAR   char
+#define NVTX_STR(x)     x
+#define NVTX_GETENV     getenv
+#define NVTX_BUFSIZE    16384
+#define NVTX_DLLHANDLE  void*
+#define NVTX_DLLOPEN(x) dlopen(x, RTLD_LAZY)
+#define NVTX_DLLFUNC(h, x) dlsym((h), (x))
+#define NVTX_DLLCLOSE   dlclose
+#if !defined(__APPLE__)
+#define NVTX_DLLDEFAULT NVTX_NULLPTR
+#else
+#define NVTX_DLLDEFAULT RTLD_DEFAULT
+#endif
+#define NVTX_YIELD()    sched_yield()
+#define NVTX_MEMBAR()   __sync_synchronize()
+/* Ensure full memory barrier for atomics, to match Windows functions */
+#define NVTX_ATOMIC_WRITE_32(address, value) \
+    __sync_synchronize(); *(address) = (value); __sync_synchronize()
+#define NVTX_ATOMIC_CAS_32(old, address, exchange, comparand) \
+    (old) = __sync_val_compare_and_swap((address), (comparand), (exchange))
+#else
+#error The library does not support your configuration!
+#endif
+/* NVTX_LOAD_SEQUENCE_VERSION macro
+*
+*  NVTX3 can update the search sequence used for finding a suitable injection library.
+*  If multiple copies of the NVTX3 headers are included in the same translation unit,
+*  the one included first sets the loader sequence.  If there is any problem where a
+*  tool is expected to load, but is not loading, the app can test this macro to verify
+*  which version of the search is being used.  Check if NVTX_LOAD_SEQUENCE_VERSION is
+*  defined; if it is not, the version is 1.  Otherwise, the version is indicated by
+*  the value of NVTX_LOAD_SEQUENCE_VERSION.
+*
+*  Version history:
+*    1: NVTX3 initial implementation.  The search continues until a usable function
+*       pointer is found.  If none is found, init aborts and rolls back anything it
+*       did during the search (e.g. any loaded libraries are unloaded).  If a non-zero
+*       function pointer is found, it is called.  If that function returns non-zero
+*       ("true" in C), that indicates a tool successfully initialized.  If it returns
+*       zero ("false"), the tool init was unsuccessful, so init aborts and rolls back
+*       anything it did.  No further attempt is made to search for a different init
+*       function if the first one found returns false.  The search order is:
+*       - Check for env var NVTX_INJECTION64_PATH (or "32" in 32-bit process)
+*         - Treat env var value as path to dynamic library, try loading it
+*         - If it loads, try get the exported symbol "InitializeInjectionNvtx2"
+*         - If this returns a non-null pointer, the search finishes here
+*       - (Android only) Look for libNvtxInjection64.so (or "32" in 32-bit process)
+*         - Must be in the /data/data/<package name>/files" directory
+*         - Treat env var value as path to dynamic library, try loading it
+*         - If it loads, try get the exported symbol "InitializeInjectionNvtx2"
+*         - If this returns a non-null pointer, the search finishes here
+*       Note: There were two other options partially implemented, but disabled.
+*       - For supporting a pre-injected library on POSIX platforms, e.g. with
+*         LD_PRELOAD, try using dlsym with a null module handle to get the init
+*         function.  This was unconditionally disabled after finding cases where
+*         a tool loaded multiple injections that supported NVTX, and couldn't
+*         control which one was getting picked by the NVTX loader.
+*       - (Linux only, not including Cygwin) Check for static injection using a
+*         weak symbol.  This was implemented incorrectly, so it wasn't usable.
+*
+*    2: Fix the support for static injection libraries.  This is meant for cases
+*       where dlopen is not supported or allowed, and the executable format has
+*       support for weak symbols.  Tools may provide a static library with a
+*       C-linkage symbol named "InitializeInjectionNvtx2_fnptr", whose type is
+*       NvtxInitializeInjectionNvtxFunc_t, i.e. a function pointer to NVTX init
+*       function.  If such a symbol is provided by a static library, the NVTX
+*       loader's weak symbol will bind to it and call it for initialization.
+*       Otherwise, the weak symbol will be defined by NVTX and default to null,
+*       indicating no static injection library is present.  Static injection is
+*       last in the load sequence, because it gives all the run-time methods of
+*       injection to override a program's compiled-in tool without rebuilding the
+*       program.  The search order is:
+*       - Check for env var NVTX_INJECTION64_PATH (or "32" in 32-bit process)
+*         - Treat env var value as path to dynamic library, try loading it
+*         - If it loads, try get the exported symbol "InitializeInjectionNvtx2"
+*         - If this returns a non-null pointer, the search finishes here
+*       - (Android only) Look for libNvtxInjection64.so (or "32" in 32-bit process)
+*         - Must be in the /data/data/<package name>/files" directory
+*         - Treat env var value as path to dynamic library, try loading it
+*         - If it loads, try get the exported symbol "InitializeInjectionNvtx2"
+*         - If this returns a non-null pointer, the search finishes here
+*       - (Currently disabled, experimental support for non-Windows) Use dlsym
+*         with a null module handle to query the process-wide dynamic symbol
+*         table for a function named "InitializeInjectionNvtx2Preinject".  The
+*         symbol is different to prevent injections from being loaded this way
+*         unless they choose to do so.
+*         - If this returns a non-null pointer, the search finishes here
+*       - (GCC-like compilers with ELF binary targets only) Check for static
+*         injection using a weak symbol "InitializeInjectionNvtx2_fnptr".
+*       If the default support choices in this header are not working as expected,
+*       clients may now override load sequence support decisions by defining these
+*       macros before including the NVTX header files:
+*       - NVTX_SUPPORT_ENV_VARS
+*       - NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY
+*       - NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE
+*       - NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
+*       - NVTX_SUPPORT_STATIC_INJECTION_LIBRARY
+*/
+#define NVTX_LOAD_SEQUENCE_VERSION 2
+#ifndef NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
+/* Define this to 1 for platforms that where pre-injected libraries can be discovered. */
+#if defined(_WIN32)
+/* Windows has no process-wide table of dynamic library symbols, so this can't be supported. */
+#define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0
+#else
+/* POSIX platforms allow calling dlsym on a null module to use the process-wide table.
+*  Note: Still disabled in load sequence version 2.  Needs to support following the
+*  RTLD_NEXT chain, and needs more testing before support can be enabled by default.*/
+#define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0
+#endif
+#endif
+#ifndef NVTX_SUPPORT_ENV_VARS
+/* Define this to 1 for platforms that support environment variables */
+/* TODO: Detect UWP, a.k.a. Windows Store app, and set this to 0. */
+/* Try:  #if defined(WINAPI_FAMILY_PARTITION) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) */
+#define NVTX_SUPPORT_ENV_VARS 1
+#endif
+#ifndef NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY
+/* Define this to 1 for platforms that support dynamic/shared libraries */
+#define NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY 1
+#endif
+#ifndef NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE
+#if defined(__ANDROID__)
+#define NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE 1
+#else
+#define NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE 0
+#endif
+#endif
+#ifndef NVTX_SUPPORT_STATIC_INJECTION_LIBRARY
+/* On platforms that support weak symbols (i.e. non-Windows), injection libraries may
+*  be statically linked into an application.  This is useful for platforms where dynamic
+*  injection is not available.  Weak symbols not marked extern are definitions, not just
+*  declarations.  They are guaranteed to be initialized to zero if no normal definitions
+*  are found by the linker to override them.  This means the NVTX load sequence can safely
+*  detect the presence of a static injection -- if InitializeInjectionNvtx2_fnptr is zero,
+*  there is no static injection. */
+#if defined(__GNUC__) && !defined(_WIN32) && !defined(__CYGWIN__)
+#define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 1
+#else
+#define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 0
+#endif
+#endif
+#if NVTX_SUPPORT_STATIC_INJECTION_LIBRARY && !defined(NVTX_STATIC_INJECTION_IMPL)
+/* To make an NVTX injection library support static injection, it must do these things:
+*  - Define InitializeInjectionNvtx2_fnptr as a normal symbol (not weak), pointing to
+*    the implementation of InitializeInjectionNvtx2 (which does not need to be a
+*    dynamic export if only supporting static injection).
+*  - Define NVTX_STATIC_INJECTION_IMPL so the weak definition below is skipped.
+*  - Compile the static injection files with -fPIC if they are to be linked with other
+*    files compiled this way.  If you forget this, GCC will simply tell you to add it.
+*  When building the application, there a few ways to link in a static injection:
+*  - Compile the injection's source files normally, and include the .o files as inputs
+*    to the linker.
+*  - If the injection is provided as an archive (.a file), it will not resolve any
+*    unresolved symbols, so the linker will skip it by default.  This can be fixed
+*    by wrapping the static injection's name on the linker command line with options
+*    to treat it differently.  For example:
+*      gcc example.o libfoo.a -Wl,--whole-archive libinj-static.a -Wl,--no-whole-archive libbar.a
+*    Note that libinj-static.a is bracketed by options to turn on "whole archive" and
+*    then back off again afterwards, so libfoo.a and libbar.a are linked normally.
+*  - In CMake, a static injection can be added with options like this:
+*      target_link_libraries(app PRIVATE -Wl,--whole-archive inj-static -Wl,--no-whole-archive)
+*/
+__attribute__((weak)) NvtxInitializeInjectionNvtxFunc_t InitializeInjectionNvtx2_fnptr;
+#endif
+/* This function tries to find or load an NVTX injection library and get the
+*  address of its InitializeInjection2 function.  If such a function pointer
+*  is found, it is called, and passed the address of this NVTX instance's
+*  nvtxGetExportTable function, so the injection can attach to this instance.
+*  If the initialization fails for any reason, any dynamic library loaded will
+*  be freed, and all NVTX implementation functions will be set to no-ops.  If
+*  initialization succeeds, NVTX functions not attached to the tool will be set
+*  to no-ops.  This is implemented as one function instead of several small
+*  functions to minimize the number of weak symbols the linker must resolve.
+*  Order of search is:
+*  - Pre-injected library exporting InitializeInjectionNvtx2
+*  - Loadable library exporting InitializeInjectionNvtx2
+*      - Path specified by env var NVTX_INJECTION??_PATH (?? is 32 or 64)
+*      - On Android, libNvtxInjection??.so within the package (?? is 32 or 64)
+*  - Statically-linked injection library defining InitializeInjectionNvtx2_fnptr
+*/
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)(void);
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)(void)
+{
+    static const char initFuncName[] = "InitializeInjectionNvtx2";
+#if NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
+    static const char initFuncPreinjectName[] = "InitializeInjectionNvtx2Preinject";
+#endif
+    NvtxInitializeInjectionNvtxFunc_t init_fnptr = NVTX_NULLPTR;
+    NVTX_DLLHANDLE injectionLibraryHandle = NVTX_DLLDEFAULT;
+    int entryPointStatus = 0;
+#if NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY
+    /* Try discovering dynamic injection library to load */
+    {
+#if NVTX_SUPPORT_ENV_VARS
+        /* If env var NVTX_INJECTION64_PATH is set, it should contain the path
+        *  to a 64-bit dynamic NVTX injection library (and similar for 32-bit). */
+        const NVTX_PATHCHAR* const nvtxEnvVarName = (sizeof(void*) == 4)
+            ? NVTX_STR("NVTX_INJECTION32_PATH")
+            : NVTX_STR("NVTX_INJECTION64_PATH");
+#endif /* NVTX_SUPPORT_ENV_VARS */
+        NVTX_PATHCHAR injectionLibraryPathBuf[NVTX_BUFSIZE];
+        const NVTX_PATHCHAR* injectionLibraryPath = NVTX_NULLPTR;
+        /* Refer to this variable explicitly in case all references to it are #if'ed out */
+        (void)injectionLibraryPathBuf;
+#if NVTX_SUPPORT_ENV_VARS
+        /* Disable the warning for getenv & _wgetenv -- this usage is safe because
+        *  these functions are not called again before using the returned value. */
+#if defined(_MSC_VER)
+#pragma warning( push )
+#pragma warning( disable : 4996 )
+#endif
+        injectionLibraryPath = NVTX_GETENV(nvtxEnvVarName);
+#if defined(_MSC_VER)
+#pragma warning( pop )
+#endif
+#endif
+#if NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE
+        if (!injectionLibraryPath)
+        {
+            const char *bits = (sizeof(void*) == 4) ? "32" : "64";
+            char cmdlineBuf[32];
+            char pkgName[PATH_MAX];
+            int count;
+            int pid;
+            FILE *fp;
+            size_t bytesRead;
+            size_t pos;
+            pid = NVTX_STATIC_CAST(int, getpid());
+            count = snprintf(cmdlineBuf, sizeof(cmdlineBuf), "/proc/%d/cmdline", pid);
+            if (count <= 0 || count >= NVTX_STATIC_CAST(int, sizeof(cmdlineBuf)))
+            {
+                NVTX_ERR("Path buffer too small for: /proc/%d/cmdline\n", pid);
+                return NVTX_ERR_INIT_ACCESS_LIBRARY;
+            }
+            fp = fopen(cmdlineBuf, "r");
+            if (!fp)
+            {
+                NVTX_ERR("File couldn't be opened: %s\n", cmdlineBuf);
+                return NVTX_ERR_INIT_ACCESS_LIBRARY;
+            }
+            bytesRead = fread(pkgName, 1, sizeof(pkgName) - 1, fp);
+            fclose(fp);
+            if (bytesRead == 0)
+            {
+                NVTX_ERR("Package name couldn't be read from file: %s\n", cmdlineBuf);
+                return NVTX_ERR_INIT_ACCESS_LIBRARY;
+            }
+            pkgName[bytesRead] = 0;
+            /* String can contain colon as a process separator. In this case the package name is before the colon. */
+            pos = 0;
+            while (pos < bytesRead && pkgName[pos] != ':' && pkgName[pos] != '\0')
+            {
+                ++pos;
+            }
+            pkgName[pos] = 0;
+            count = snprintf(injectionLibraryPathBuf, NVTX_BUFSIZE, "/data/data/%s/files/libNvtxInjection%s.so", pkgName, bits);
+            if (count <= 0 || count >= NVTX_BUFSIZE)
+            {
+                NVTX_ERR("Path buffer too small for: /data/data/%s/files/libNvtxInjection%s.so\n", pkgName, bits);
+                return NVTX_ERR_INIT_ACCESS_LIBRARY;
+            }
+            /* On Android, verify path is accessible due to aggressive file access restrictions. */
+            /* For dlopen, if the filename contains a leading slash, then it is interpreted as a */
+            /* relative or absolute pathname; otherwise it will follow the rules in ld.so. */
+            if (injectionLibraryPathBuf[0] == '/')
+            {
+#if (__ANDROID_API__ < 21)
+                int access_err = access(injectionLibraryPathBuf, F_OK | R_OK);
+#else
+                int access_err = faccessat(AT_FDCWD, injectionLibraryPathBuf, F_OK | R_OK, 0);
+#endif
+                if (access_err != 0)
+                {
+                    NVTX_ERR("Injection library path wasn't accessible [code=%s] [path=%s]\n", strerror(errno), injectionLibraryPathBuf);
+                    return NVTX_ERR_INIT_ACCESS_LIBRARY;
+                }
+            }
+            injectionLibraryPath = injectionLibraryPathBuf;
+        }
+#endif /* NVTX_SUPPORT_ANDROID_INJECTION_LIBRARY_IN_PACKAGE */
+        /* At this point, injectionLibraryPath is specified if a dynamic
+        *  injection library was specified by a tool. */
+        if (injectionLibraryPath)
+        {
+            /* Load the injection library */
+            injectionLibraryHandle = NVTX_DLLOPEN(injectionLibraryPath);
+            if (!injectionLibraryHandle)
+            {
+                NVTX_ERR("Failed to load injection library\n");
+                return NVTX_ERR_INIT_LOAD_LIBRARY;
+            }
+            else
+            {
+                /* Attempt to get the injection library's entry-point */
+                init_fnptr = NVTX_REINTERPRET_CAST(NvtxInitializeInjectionNvtxFunc_t, NVTX_DLLFUNC(injectionLibraryHandle, initFuncName));
+                if (!init_fnptr)
+                {
+                    NVTX_DLLCLOSE(injectionLibraryHandle);
+                    NVTX_ERR("Failed to get address of function InitializeInjectionNvtx2 from injection library\n");
+                    return NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT;
+                }
+            }
+        }
+    }
+#endif /* NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY */
+#if NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
+    if (!init_fnptr)
+    {
+        /* Use POSIX global symbol chain to query for init function from any module */
+        init_fnptr = NVTX_REINTERPRET_CAST(NvtxInitializeInjectionNvtxFunc_t, NVTX_DLLFUNC(NVTX_DLLDEFAULT, initFuncPreinjectName));
+    }
+#endif
+#if NVTX_SUPPORT_STATIC_INJECTION_LIBRARY
+    if (!init_fnptr)
+    {
+        /* Check weakly-defined function pointer.  A statically-linked injection can define this
+        *  as a normal symbol and set it to the address of the NVTX init function -- this will
+        *  provide a non-null value here.  If there is no other definition of this symbol, it
+        *  will be null here. */
+        if (InitializeInjectionNvtx2_fnptr)
+        {
+            init_fnptr = InitializeInjectionNvtx2_fnptr;
+        }
+    }
+#endif
+    /* At this point, if init_fnptr is not set, then no tool has specified
+    *  an NVTX injection library -- return non-success result so all NVTX
+    *  API functions will be set to no-ops. */
+    if (!init_fnptr)
+    {
+        return NVTX_ERR_NO_INJECTION_LIBRARY_AVAILABLE;
+    }
+    /* Invoke injection library's initialization function.  If it returns
+    *  0 (failure) and a dynamic injection was loaded, unload it. */
+    entryPointStatus = init_fnptr(NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable));
+    if (entryPointStatus == 0)
+    {
+        NVTX_ERR("Failed to initialize injection library -- initialization function returned 0\n");
+        if (injectionLibraryHandle)
+        {
+            NVTX_DLLCLOSE(injectionLibraryHandle);
+        }
+        return NVTX_ERR_INIT_FAILED_LIBRARY_ENTRY_POINT;
+    }
+    return NVTX_SUCCESS;
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(void)
+{
+    unsigned int old;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState == NVTX_INIT_STATE_COMPLETE)
+    {
+        return;
+    }
+    NVTX_ATOMIC_CAS_32(
+        old,
+        &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState,
+        NVTX_INIT_STATE_STARTED,
+        NVTX_INIT_STATE_FRESH);
+    if (old == NVTX_INIT_STATE_FRESH)
+    {
+        int result;
+        int forceAllToNoops;
+        /* Load & initialize injection library -- it will assign the function pointers */
+        result = NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)();
+        /* Set all pointers not assigned by the injection to null */
+        forceAllToNoops = result != NVTX_SUCCESS; /* Set all to null if injection init failed */
+        NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(forceAllToNoops);
+        /* Signal that initialization has finished, so now the assigned function pointers will be used */
+        NVTX_ATOMIC_WRITE_32(
+            &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState,
+            NVTX_INIT_STATE_COMPLETE);
+    }
+    else /* Spin-wait until initialization has finished */
+    {
+        NVTX_MEMBAR();
+        while (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState != NVTX_INIT_STATE_COMPLETE)
+        {
+            NVTX_YIELD();
+            NVTX_MEMBAR();
+        }
+    }
+}

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxInitDecls.h ADDED Viewed

	@@ -0,0 +1,103 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_IMPL_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init)(const nvtxEventAttributes_t* eventAttrib);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init)(const char* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init)(const wchar_t* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init)(const nvtxEventAttributes_t* eventAttrib);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init)(const char* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init)(const wchar_t* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init)(nvtxRangeId_t id);
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init)(const nvtxEventAttributes_t* eventAttrib);
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init)(const char* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init)(const wchar_t* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init)(void);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init)(uint32_t category, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init)(uint32_t category, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init)(uint32_t threadId, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init)(uint32_t threadId, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init)(nvtx_CUdevice device, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init)(nvtx_CUdevice device, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init)(nvtx_CUcontext context, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init)(nvtx_CUcontext context, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init)(nvtx_CUstream stream, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init)(nvtx_CUstream stream, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init)(nvtx_CUevent event, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init)(nvtx_CUevent event, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init)(nvtx_cl_device_id device, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init)(nvtx_cl_device_id device, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init)(nvtx_cl_context context, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init)(nvtx_cl_context context, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init)(nvtx_cl_command_queue command_queue, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init)(nvtx_cl_command_queue command_queue, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init)(nvtx_cl_mem memobj, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init)(nvtx_cl_mem memobj, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init)(nvtx_cl_sampler sampler, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init)(nvtx_cl_sampler sampler, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init)(nvtx_cl_program program, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init)(nvtx_cl_program program, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init)(nvtx_cl_event evnt, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init)(nvtx_cl_event evnt, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init)(int device, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init)(int device, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init)(nvtx_cudaStream_t stream, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init)(nvtx_cudaStream_t stream, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init)(nvtx_cudaEvent_t event, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init)(nvtx_cudaEvent_t event, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init)(nvtxDomainHandle_t domain, nvtxRangeId_t id);
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init)(nvtxDomainHandle_t domain);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxResourceHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init)(nvtxResourceHandle_t resource);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init)(nvtxDomainHandle_t domain, const char* string);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init)(nvtxDomainHandle_t domain, const wchar_t* string);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init)(const char* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init)(const wchar_t* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init)(nvtxDomainHandle_t domain);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init)(const void* reserved);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtx_nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtx_nvtxSyncUserAttributes_t* attribs);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtx_nvtxSyncUser_t handle);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtx_nvtxSyncUser_t handle);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtx_nvtxSyncUser_t handle);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtx_nvtxSyncUser_t handle);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtx_nvtxSyncUser_t handle);

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxInitDefs.h ADDED Viewed

	@@ -0,0 +1,595 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#ifndef NVTX_IMPL_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxMarkEx(eventAttrib);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init)(const char* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxMarkA(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init)(const wchar_t* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxMarkW(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxRangeStartEx(eventAttrib);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init)(const char* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxRangeStartA(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init)(const wchar_t* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxRangeStartW(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init)(nvtxRangeId_t id){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxRangeEnd(id);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxRangePushEx(eventAttrib);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init)(const char* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxRangePushA(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init)(const wchar_t* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxRangePushW(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init)(void){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxRangePop();
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init)(uint32_t category, const char* name){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxNameCategoryA(category, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init)(uint32_t category, const wchar_t* name){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxNameCategoryW(category, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init)(uint32_t threadId, const char* name){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxNameOsThreadA(threadId, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init)(uint32_t threadId, const wchar_t* name){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxNameOsThreadW(threadId, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxDomainMarkEx(domain, eventAttrib);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainRangeStartEx(domain, eventAttrib);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init)(nvtxDomainHandle_t domain, nvtxRangeId_t id){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxDomainRangeEnd(domain, id);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainRangePushEx(domain, eventAttrib);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init)(nvtxDomainHandle_t domain){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainRangePop(domain);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxResourceHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainResourceCreate(domain, attribs);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init)(nvtxResourceHandle_t resource){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxDomainResourceDestroy(resource);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const char* name){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxDomainNameCategoryA(domain, category, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxDomainNameCategoryW(domain, category, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init)(nvtxDomainHandle_t domain, const char* string){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainRegisterStringA(domain, string);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init)(nvtxDomainHandle_t domain, const wchar_t* string){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainRegisterStringW(domain, string);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init)(const char* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainCreateA(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init)(const wchar_t* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainCreateW(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init)(nvtxDomainHandle_t domain){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxDomainDestroy(domain);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init)(const void* reserved){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxInitialize(reserved);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init)(nvtx_CUdevice device, const char* name){
+    nvtxNameCuDeviceA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr;
+    if (local)
+        local(device, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init)(nvtx_CUdevice device, const wchar_t* name){
+    nvtxNameCuDeviceW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr;
+    if (local)
+        local(device, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init)(nvtx_CUcontext context, const char* name){
+    nvtxNameCuContextA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr;
+    if (local)
+        local(context, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init)(nvtx_CUcontext context, const wchar_t* name){
+    nvtxNameCuContextW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr;
+    if (local)
+        local(context, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init)(nvtx_CUstream stream, const char* name){
+    nvtxNameCuStreamA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr;
+    if (local)
+        local(stream, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init)(nvtx_CUstream stream, const wchar_t* name){
+    nvtxNameCuStreamW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr;
+    if (local)
+        local(stream, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init)(nvtx_CUevent event, const char* name){
+    nvtxNameCuEventA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr;
+    if (local)
+        local(event, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init)(nvtx_CUevent event, const wchar_t* name){
+    nvtxNameCuEventW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr;
+    if (local)
+        local(event, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init)(int device, const char* name){
+    nvtxNameCudaDeviceA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr;
+    if (local)
+        local(device, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init)(int device, const wchar_t* name){
+    nvtxNameCudaDeviceW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr;
+    if (local)
+        local(device, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init)(nvtx_cudaStream_t stream, const char* name){
+    nvtxNameCudaStreamA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr;
+    if (local)
+        local(stream, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init)(nvtx_cudaStream_t stream, const wchar_t* name){
+    nvtxNameCudaStreamW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr;
+    if (local)
+        local(stream, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init)(nvtx_cudaEvent_t event, const char* name){
+    nvtxNameCudaEventA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr;
+    if (local)
+        local(event, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init)(nvtx_cudaEvent_t event, const wchar_t* name){
+    nvtxNameCudaEventW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr;
+    if (local)
+        local(event, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init)(nvtx_cl_device_id device, const char* name){
+    nvtxNameClDeviceA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr;
+    if (local)
+        local(device, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init)(nvtx_cl_device_id device, const wchar_t* name){
+    nvtxNameClDeviceW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr;
+    if (local)
+        local(device, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init)(nvtx_cl_context context, const char* name){
+    nvtxNameClContextA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr;
+    if (local)
+        local(context, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init)(nvtx_cl_context context, const wchar_t* name){
+    nvtxNameClContextW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr;
+    if (local)
+        local(context, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init)(nvtx_cl_command_queue command_queue, const char* name){
+    nvtxNameClCommandQueueA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr;
+    if (local)
+        local(command_queue, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init)(nvtx_cl_command_queue command_queue, const wchar_t* name){
+    nvtxNameClCommandQueueW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr;
+    if (local)
+        local(command_queue, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init)(nvtx_cl_mem memobj, const char* name){
+    nvtxNameClMemObjectA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr;
+    if (local)
+        local(memobj, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init)(nvtx_cl_mem memobj, const wchar_t* name){
+    nvtxNameClMemObjectW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr;
+    if (local)
+        local(memobj, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init)(nvtx_cl_sampler sampler, const char* name){
+    nvtxNameClSamplerA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr;
+    if (local)
+        local(sampler, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init)(nvtx_cl_sampler sampler, const wchar_t* name){
+    nvtxNameClSamplerW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr;
+    if (local)
+        local(sampler, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init)(nvtx_cl_program program, const char* name){
+    nvtxNameClProgramA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr;
+    if (local)
+        local(program, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init)(nvtx_cl_program program, const wchar_t* name){
+    nvtxNameClProgramW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr;
+    if (local)
+        local(program, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init)(nvtx_cl_event evnt, const char* name){
+    nvtxNameClEventA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr;
+    if (local)
+        local(evnt, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init)(nvtx_cl_event evnt, const wchar_t* name){
+    nvtxNameClEventW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr;
+    if (local)
+        local(evnt, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtx_nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtx_nvtxSyncUserAttributes_t* attribs){
+    nvtxDomainSyncUserCreate_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr;
+    if (local) {
+        return local(domain, attribs);
+    }
+    return NVTX_NULLPTR;
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtx_nvtxSyncUser_t handle){
+    nvtxDomainSyncUserDestroy_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr;
+    if (local)
+        local(handle);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtx_nvtxSyncUser_t handle){
+    nvtxDomainSyncUserAcquireStart_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr;
+    if (local)
+        local(handle);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtx_nvtxSyncUser_t handle){
+    nvtxDomainSyncUserAcquireFailed_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr;
+    if (local)
+        local(handle);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtx_nvtxSyncUser_t handle){
+    nvtxDomainSyncUserAcquireSuccess_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr;
+    if (local)
+        local(handle);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtx_nvtxSyncUser_t handle){
+    nvtxDomainSyncUserReleasing_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr;
+    if (local)
+        local(handle);
+}
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(int forceAllToNoops);
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(int forceAllToNoops)
+{
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr = NVTX_NULLPTR;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr = NVTX_NULLPTR;
+}

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxLinkOnce.h ADDED Viewed

	@@ -0,0 +1,88 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+#ifndef __NVTX_LINKONCE_H__
+#define __NVTX_LINKONCE_H__
+/* This header defines macros to permit making definitions of global variables
+ * and functions in C/C++ header files which may be included multiple times in
+ * a translation unit or linkage unit.  It allows authoring header-only libraries
+ * which can be used by multiple other header-only libraries (either as the same
+ * copy or multiple copies), and does not require any build changes, such as
+ * adding another .c file, linking a static library, or deploying a dynamic
+ * library.  Globals defined with these macros have the property that they have
+ * the same address, pointing to a single instance, for the entire linkage unit.
+ * It is expected but not guaranteed that each linkage unit will have a separate
+ * instance.
+ *
+ * In some situations it is desirable to declare a variable without initializing
+ * it, refer to it in code or other variables' initializers, and then initialize
+ * it later.  Similarly, functions can be prototyped, have their address taken,
+ * and then have their body defined later.  In such cases, use the FWDDECL macros
+ * when forward-declaring LINKONCE global variables without initializers and
+ * function prototypes, and then use the DEFINE macros when later defining them.
+ * Although in many cases the FWDDECL macro is equivalent to the DEFINE macro,
+ * following this pattern makes code maximally portable.
+ */
+#if defined(_MSC_VER) /* MSVC */
+    #if defined(__cplusplus)
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   extern "C" __declspec(selectany)
+        #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline
+    #else
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   __declspec(selectany)
+        #define NVTX_LINKONCE_DEFINE_FUNCTION __inline
+    #endif
+    #define NVTX_LINKONCE_FWDDECL_GLOBAL      NVTX_LINKONCE_DEFINE_GLOBAL extern
+#elif defined(_WIN32) || defined(__CYGWIN__) /* MinGW */
+    #if defined(__cplusplus)
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   __declspec(selectany)
+        #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline
+    #else
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   __declspec(selectany)
+        #define NVTX_LINKONCE_DEFINE_FUNCTION
+    #endif
+    #define NVTX_LINKONCE_FWDDECL_GLOBAL      extern
+#else /* All others: Assume GCC, clang, or compatible */
+    #define NVTX_LINKONCE_WEAK   __attribute__((weak))
+    #define NVTX_LINKONCE_HIDDEN __attribute__((visibility("hidden")))
+    #if defined(__cplusplus)
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK
+        #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" NVTX_LINKONCE_HIDDEN inline
+    #else
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK
+        #define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK
+    #endif
+    #define NVTX_LINKONCE_FWDDECL_GLOBAL      NVTX_LINKONCE_DEFINE_GLOBAL extern
+#endif
+#define NVTX_LINKONCE_FWDDECL_FUNCTION        NVTX_LINKONCE_DEFINE_FUNCTION
+#endif /* __NVTX_LINKONCE_H__ */

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/include/nvtx3/nvtxDetail/nvtxTypes.h ADDED Viewed

	@@ -0,0 +1,318 @@

+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
+ */
+/* This header defines types which are used by the internal implementation
+*  of NVTX and callback subscribers.  API clients do not use these types,
+*  so they are defined here instead of in nvToolsExt.h to clarify they are
+*  not part of the NVTX client API. */
+#ifndef NVTX_IMPL_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExt.h.
+#endif
+#if defined(NVTX_AS_SYSTEM_HEADER)
+#if defined(__clang__)
+#pragma clang system_header
+#elif defined(__GNUC__) || defined(__NVCOMPILER)
+#pragma GCC system_header
+#elif defined(_MSC_VER)
+#pragma system_header
+#endif
+#endif
+/* ------ Dependency-free types binary-compatible with real types ------- */
+/* In order to avoid having the NVTX core API headers depend on non-NVTX
+*  headers like cuda.h, NVTX defines binary-compatible types to use for
+*  safely making the initialization versions of all NVTX functions without
+*  needing to have definitions for the real types. */
+typedef int   nvtx_CUdevice;
+typedef void* nvtx_CUcontext;
+typedef void* nvtx_CUstream;
+typedef void* nvtx_CUevent;
+typedef void* nvtx_cudaStream_t;
+typedef void* nvtx_cudaEvent_t;
+typedef void* nvtx_cl_platform_id;
+typedef void* nvtx_cl_device_id;
+typedef void* nvtx_cl_context;
+typedef void* nvtx_cl_command_queue;
+typedef void* nvtx_cl_mem;
+typedef void* nvtx_cl_program;
+typedef void* nvtx_cl_kernel;
+typedef void* nvtx_cl_event;
+typedef void* nvtx_cl_sampler;
+typedef void* nvtx_nvtxSyncUser_t;
+typedef void nvtx_nvtxSyncUserAttributes_t;
+/* --------- Types for function pointers (with fake API types) ---------- */
+typedef void (NVTX_API * nvtxMarkEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib);
+typedef void (NVTX_API * nvtxMarkA_impl_fntype)(const char* message);
+typedef void (NVTX_API * nvtxMarkW_impl_fntype)(const wchar_t* message);
+typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib);
+typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartA_impl_fntype)(const char* message);
+typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartW_impl_fntype)(const wchar_t* message);
+typedef void (NVTX_API * nvtxRangeEnd_impl_fntype)(nvtxRangeId_t id);
+typedef int (NVTX_API * nvtxRangePushEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib);
+typedef int (NVTX_API * nvtxRangePushA_impl_fntype)(const char* message);
+typedef int (NVTX_API * nvtxRangePushW_impl_fntype)(const wchar_t* message);
+typedef int (NVTX_API * nvtxRangePop_impl_fntype)(void);
+typedef void (NVTX_API * nvtxNameCategoryA_impl_fntype)(uint32_t category, const char* name);
+typedef void (NVTX_API * nvtxNameCategoryW_impl_fntype)(uint32_t category, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameOsThreadA_impl_fntype)(uint32_t threadId, const char* name);
+typedef void (NVTX_API * nvtxNameOsThreadW_impl_fntype)(uint32_t threadId, const wchar_t* name);
+/* Real impl types are defined in nvtxImplCuda_v3.h, where CUDA headers are included */
+typedef void (NVTX_API * nvtxNameCuDeviceA_fakeimpl_fntype)(nvtx_CUdevice device, const char* name);
+typedef void (NVTX_API * nvtxNameCuDeviceW_fakeimpl_fntype)(nvtx_CUdevice device, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCuContextA_fakeimpl_fntype)(nvtx_CUcontext context, const char* name);
+typedef void (NVTX_API * nvtxNameCuContextW_fakeimpl_fntype)(nvtx_CUcontext context, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCuStreamA_fakeimpl_fntype)(nvtx_CUstream stream, const char* name);
+typedef void (NVTX_API * nvtxNameCuStreamW_fakeimpl_fntype)(nvtx_CUstream stream, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCuEventA_fakeimpl_fntype)(nvtx_CUevent event, const char* name);
+typedef void (NVTX_API * nvtxNameCuEventW_fakeimpl_fntype)(nvtx_CUevent event, const wchar_t* name);
+/* Real impl types are defined in nvtxImplOpenCL_v3.h, where OPENCL headers are included */
+typedef void (NVTX_API * nvtxNameClDeviceA_fakeimpl_fntype)(nvtx_cl_device_id device, const char* name);
+typedef void (NVTX_API * nvtxNameClDeviceW_fakeimpl_fntype)(nvtx_cl_device_id device, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClContextA_fakeimpl_fntype)(nvtx_cl_context context, const char* name);
+typedef void (NVTX_API * nvtxNameClContextW_fakeimpl_fntype)(nvtx_cl_context context, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClCommandQueueA_fakeimpl_fntype)(nvtx_cl_command_queue command_queue, const char* name);
+typedef void (NVTX_API * nvtxNameClCommandQueueW_fakeimpl_fntype)(nvtx_cl_command_queue command_queue, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClMemObjectA_fakeimpl_fntype)(nvtx_cl_mem memobj, const char* name);
+typedef void (NVTX_API * nvtxNameClMemObjectW_fakeimpl_fntype)(nvtx_cl_mem memobj, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClSamplerA_fakeimpl_fntype)(nvtx_cl_sampler sampler, const char* name);
+typedef void (NVTX_API * nvtxNameClSamplerW_fakeimpl_fntype)(nvtx_cl_sampler sampler, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClProgramA_fakeimpl_fntype)(nvtx_cl_program program, const char* name);
+typedef void (NVTX_API * nvtxNameClProgramW_fakeimpl_fntype)(nvtx_cl_program program, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClEventA_fakeimpl_fntype)(nvtx_cl_event evnt, const char* name);
+typedef void (NVTX_API * nvtxNameClEventW_fakeimpl_fntype)(nvtx_cl_event evnt, const wchar_t* name);
+/* Real impl types are defined in nvtxImplCudaRt_v3.h, where CUDART headers are included */
+typedef void (NVTX_API * nvtxNameCudaDeviceA_fakeimpl_fntype)(int device, const char* name);
+typedef void (NVTX_API * nvtxNameCudaDeviceW_fakeimpl_fntype)(int device, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCudaStreamA_fakeimpl_fntype)(nvtx_cudaStream_t stream, const char* name);
+typedef void (NVTX_API * nvtxNameCudaStreamW_fakeimpl_fntype)(nvtx_cudaStream_t stream, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCudaEventA_fakeimpl_fntype)(nvtx_cudaEvent_t event, const char* name);
+typedef void (NVTX_API * nvtxNameCudaEventW_fakeimpl_fntype)(nvtx_cudaEvent_t event, const wchar_t* name);
+typedef void (NVTX_API * nvtxDomainMarkEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+typedef nvtxRangeId_t (NVTX_API * nvtxDomainRangeStartEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+typedef void (NVTX_API * nvtxDomainRangeEnd_impl_fntype)(nvtxDomainHandle_t domain, nvtxRangeId_t id);
+typedef int (NVTX_API * nvtxDomainRangePushEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+typedef int (NVTX_API * nvtxDomainRangePop_impl_fntype)(nvtxDomainHandle_t domain);
+typedef nvtxResourceHandle_t (NVTX_API * nvtxDomainResourceCreate_impl_fntype)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs);
+typedef void (NVTX_API * nvtxDomainResourceDestroy_impl_fntype)(nvtxResourceHandle_t resource);
+typedef void (NVTX_API * nvtxDomainNameCategoryA_impl_fntype)(nvtxDomainHandle_t domain, uint32_t category, const char* name);
+typedef void (NVTX_API * nvtxDomainNameCategoryW_impl_fntype)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name);
+typedef nvtxStringHandle_t (NVTX_API * nvtxDomainRegisterStringA_impl_fntype)(nvtxDomainHandle_t domain, const char* string);
+typedef nvtxStringHandle_t (NVTX_API * nvtxDomainRegisterStringW_impl_fntype)(nvtxDomainHandle_t domain, const wchar_t* string);
+typedef nvtxDomainHandle_t (NVTX_API * nvtxDomainCreateA_impl_fntype)(const char* message);
+typedef nvtxDomainHandle_t (NVTX_API * nvtxDomainCreateW_impl_fntype)(const wchar_t* message);
+typedef void (NVTX_API * nvtxDomainDestroy_impl_fntype)(nvtxDomainHandle_t domain);
+typedef void (NVTX_API * nvtxInitialize_impl_fntype)(const void* reserved);
+typedef nvtx_nvtxSyncUser_t (NVTX_API * nvtxDomainSyncUserCreate_fakeimpl_fntype)(nvtxDomainHandle_t domain, const nvtx_nvtxSyncUserAttributes_t* attribs);
+typedef void (NVTX_API * nvtxDomainSyncUserDestroy_fakeimpl_fntype)(nvtx_nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserAcquireStart_fakeimpl_fntype)(nvtx_nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserAcquireFailed_fakeimpl_fntype)(nvtx_nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserAcquireSuccess_fakeimpl_fntype)(nvtx_nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserReleasing_fakeimpl_fntype)(nvtx_nvtxSyncUser_t handle);
+/* ---------------- Types for callback subscription --------------------- */
+typedef const void *(NVTX_API * NvtxGetExportTableFunc_t)(uint32_t exportTableId);
+typedef int (NVTX_API * NvtxInitializeInjectionNvtxFunc_t)(NvtxGetExportTableFunc_t exportTable);
+typedef enum NvtxCallbackModule
+{
+    NVTX_CB_MODULE_INVALID                 = 0,
+    NVTX_CB_MODULE_CORE                    = 1,
+    NVTX_CB_MODULE_CUDA                    = 2,
+    NVTX_CB_MODULE_OPENCL                  = 3,
+    NVTX_CB_MODULE_CUDART                  = 4,
+    NVTX_CB_MODULE_CORE2                   = 5,
+    NVTX_CB_MODULE_SYNC                    = 6,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_CB_MODULE_SIZE,
+    NVTX_CB_MODULE_FORCE_INT               = 0x7fffffff
+} NvtxCallbackModule;
+typedef enum NvtxCallbackIdCore
+{
+    NVTX_CBID_CORE_INVALID                 =  0,
+    NVTX_CBID_CORE_MarkEx                  =  1,
+    NVTX_CBID_CORE_MarkA                   =  2,
+    NVTX_CBID_CORE_MarkW                   =  3,
+    NVTX_CBID_CORE_RangeStartEx            =  4,
+    NVTX_CBID_CORE_RangeStartA             =  5,
+    NVTX_CBID_CORE_RangeStartW             =  6,
+    NVTX_CBID_CORE_RangeEnd                =  7,
+    NVTX_CBID_CORE_RangePushEx             =  8,
+    NVTX_CBID_CORE_RangePushA              =  9,
+    NVTX_CBID_CORE_RangePushW              = 10,
+    NVTX_CBID_CORE_RangePop                = 11,
+    NVTX_CBID_CORE_NameCategoryA           = 12,
+    NVTX_CBID_CORE_NameCategoryW           = 13,
+    NVTX_CBID_CORE_NameOsThreadA           = 14,
+    NVTX_CBID_CORE_NameOsThreadW           = 15,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_CBID_CORE_SIZE,
+    NVTX_CBID_CORE_FORCE_INT = 0x7fffffff
+} NvtxCallbackIdCore;
+typedef enum NvtxCallbackIdCore2
+{
+    NVTX_CBID_CORE2_INVALID                 = 0,
+    NVTX_CBID_CORE2_DomainMarkEx            = 1,
+    NVTX_CBID_CORE2_DomainRangeStartEx      = 2,
+    NVTX_CBID_CORE2_DomainRangeEnd          = 3,
+    NVTX_CBID_CORE2_DomainRangePushEx       = 4,
+    NVTX_CBID_CORE2_DomainRangePop          = 5,
+    NVTX_CBID_CORE2_DomainResourceCreate    = 6,
+    NVTX_CBID_CORE2_DomainResourceDestroy   = 7,
+    NVTX_CBID_CORE2_DomainNameCategoryA     = 8,
+    NVTX_CBID_CORE2_DomainNameCategoryW     = 9,
+    NVTX_CBID_CORE2_DomainRegisterStringA   = 10,
+    NVTX_CBID_CORE2_DomainRegisterStringW   = 11,
+    NVTX_CBID_CORE2_DomainCreateA           = 12,
+    NVTX_CBID_CORE2_DomainCreateW           = 13,
+    NVTX_CBID_CORE2_DomainDestroy           = 14,
+    NVTX_CBID_CORE2_Initialize              = 15,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_CBID_CORE2_SIZE,
+    NVTX_CBID_CORE2_FORCE_INT               = 0x7fffffff
+} NvtxCallbackIdCore2;
+typedef enum NvtxCallbackIdCuda
+{
+    NVTX_CBID_CUDA_INVALID                 =  0,
+    NVTX_CBID_CUDA_NameCuDeviceA           =  1,
+    NVTX_CBID_CUDA_NameCuDeviceW           =  2,
+    NVTX_CBID_CUDA_NameCuContextA          =  3,
+    NVTX_CBID_CUDA_NameCuContextW          =  4,
+    NVTX_CBID_CUDA_NameCuStreamA           =  5,
+    NVTX_CBID_CUDA_NameCuStreamW           =  6,
+    NVTX_CBID_CUDA_NameCuEventA            =  7,
+    NVTX_CBID_CUDA_NameCuEventW            =  8,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_CBID_CUDA_SIZE,
+    NVTX_CBID_CUDA_FORCE_INT               = 0x7fffffff
+} NvtxCallbackIdCuda;
+typedef enum NvtxCallbackIdCudaRt
+{
+    NVTX_CBID_CUDART_INVALID               =  0,
+    NVTX_CBID_CUDART_NameCudaDeviceA       =  1,
+    NVTX_CBID_CUDART_NameCudaDeviceW       =  2,
+    NVTX_CBID_CUDART_NameCudaStreamA       =  3,
+    NVTX_CBID_CUDART_NameCudaStreamW       =  4,
+    NVTX_CBID_CUDART_NameCudaEventA        =  5,
+    NVTX_CBID_CUDART_NameCudaEventW        =  6,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_CBID_CUDART_SIZE,
+    NVTX_CBID_CUDART_FORCE_INT             = 0x7fffffff
+} NvtxCallbackIdCudaRt;
+typedef enum NvtxCallbackIdOpenCL
+{
+    NVTX_CBID_OPENCL_INVALID               =  0,
+    NVTX_CBID_OPENCL_NameClDeviceA         =  1,
+    NVTX_CBID_OPENCL_NameClDeviceW         =  2,
+    NVTX_CBID_OPENCL_NameClContextA        =  3,
+    NVTX_CBID_OPENCL_NameClContextW        =  4,
+    NVTX_CBID_OPENCL_NameClCommandQueueA   =  5,
+    NVTX_CBID_OPENCL_NameClCommandQueueW   =  6,
+    NVTX_CBID_OPENCL_NameClMemObjectA      =  7,
+    NVTX_CBID_OPENCL_NameClMemObjectW      =  8,
+    NVTX_CBID_OPENCL_NameClSamplerA        =  9,
+    NVTX_CBID_OPENCL_NameClSamplerW        = 10,
+    NVTX_CBID_OPENCL_NameClProgramA        = 11,
+    NVTX_CBID_OPENCL_NameClProgramW        = 12,
+    NVTX_CBID_OPENCL_NameClEventA          = 13,
+    NVTX_CBID_OPENCL_NameClEventW          = 14,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_CBID_OPENCL_SIZE,
+    NVTX_CBID_OPENCL_FORCE_INT             = 0x7fffffff
+} NvtxCallbackIdOpenCL;
+typedef enum NvtxCallbackIdSync
+{
+    NVTX_CBID_SYNC_INVALID                      = 0,
+    NVTX_CBID_SYNC_DomainSyncUserCreate         = 1,
+    NVTX_CBID_SYNC_DomainSyncUserDestroy        = 2,
+    NVTX_CBID_SYNC_DomainSyncUserAcquireStart   = 3,
+    NVTX_CBID_SYNC_DomainSyncUserAcquireFailed  = 4,
+    NVTX_CBID_SYNC_DomainSyncUserAcquireSuccess = 5,
+    NVTX_CBID_SYNC_DomainSyncUserReleasing      = 6,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_CBID_SYNC_SIZE,
+    NVTX_CBID_SYNC_FORCE_INT                    = 0x7fffffff
+} NvtxCallbackIdSync;
+/* IDs for NVTX Export Tables */
+typedef enum NvtxExportTableID
+{
+    NVTX_ETID_INVALID                      = 0,
+    NVTX_ETID_CALLBACKS                    = 1,
+    NVTX_ETID_RESERVED0                    = 2,
+    NVTX_ETID_VERSIONINFO                  = 3,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_ETID_SIZE,
+    NVTX_ETID_FORCE_INT                    = 0x7fffffff
+} NvtxExportTableID;
+typedef void (* NvtxFunctionPointer)(void); /* generic uncallable function pointer, must be cast to appropriate function type */
+typedef NvtxFunctionPointer** NvtxFunctionTable; /* double pointer because array(1) of pointers(2) to function pointers */
+typedef struct NvtxExportTableCallbacks
+{
+    size_t struct_size;
+    /* returns an array of pointer to function pointers*/
+    int (NVTX_API *GetModuleFunctionTable)(
+        NvtxCallbackModule callback_module,
+        NvtxFunctionTable* out_table,
+        unsigned int* out_size);
+} NvtxExportTableCallbacks;
+typedef struct NvtxExportTableVersionInfo
+{
+    /* sizeof(NvtxExportTableVersionInfo) */
+    size_t struct_size;
+    /* The API version comes from the NVTX library linked to the app.  The
+    * injection library is can use this info to make some assumptions */
+    uint32_t version;
+    /* Reserved for alignment, do not use */
+    uint32_t reserved0;
+    /* This must be set by tools when attaching to provide applications
+    *  the ability to, in emergency situations, detect problematic tools
+    *  versions and modify the NVTX source to prevent attaching anything
+    *  that causes trouble in the app.  Currently, this value is ignored. */
+    void (NVTX_API *SetInjectionNvtxVersion)(
+        uint32_t version);
+} NvtxExportTableVersionInfo;

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/lib/libcufile_rdma.so.1 ADDED Viewed

Binary file (43.3 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cu13/lib/libnvtx3interop.so.1 ADDED Viewed

Binary file (40.2 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (205 Bytes). View file

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/include/__init__.py ADDED Viewed

File without changes

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/include/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (213 Bytes). View file

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/include/cufile.h ADDED Viewed

	@@ -0,0 +1,740 @@

+/*
+ * Copyright 1993-2023 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+/**
+ * @file cufile.h
+ * @brief  cuFile C APIs
+ *
+ * This file contains all the C APIs to perform GPUDirect Storage supported IO operations
+ */
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+/// @cond DOXYGEN_SKIP_MACRO
+#ifndef __CUFILE_H_
+#define __CUFILE_H_
+#include <stdlib.h>
+#include <stdbool.h>
+#include <cuda.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#define CUFILEOP_BASE_ERR 5000
+//Note :Data path errors are captured via standard error codes
+#define CUFILEOP_STATUS_ENTRIES \
+	CUFILE_OP(0,                      CU_FILE_SUCCESS, cufile success) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 1,  CU_FILE_DRIVER_NOT_INITIALIZED, nvidia-fs driver is not loaded. Set allow_compat_mode to true in cufile.json file to enable compatible mode) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 2,  CU_FILE_DRIVER_INVALID_PROPS, invalid property) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 3,  CU_FILE_DRIVER_UNSUPPORTED_LIMIT, property range error) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 4,  CU_FILE_DRIVER_VERSION_MISMATCH, nvidia-fs driver version mismatch) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 5,  CU_FILE_DRIVER_VERSION_READ_ERROR, nvidia-fs driver version read error) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 6,  CU_FILE_DRIVER_CLOSING, driver shutdown in progress) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 7,  CU_FILE_PLATFORM_NOT_SUPPORTED, GPUDirect Storage not supported on current platform) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 8,  CU_FILE_IO_NOT_SUPPORTED, GPUDirect Storage not supported on current file) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 9,  CU_FILE_DEVICE_NOT_SUPPORTED, GPUDirect Storage not supported on current GPU) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 10, CU_FILE_NVFS_DRIVER_ERROR, nvidia-fs driver ioctl error) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 11, CU_FILE_CUDA_DRIVER_ERROR, CUDA Driver API error) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 12, CU_FILE_CUDA_POINTER_INVALID, invalid device pointer) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 13, CU_FILE_CUDA_MEMORY_TYPE_INVALID, invalid pointer memory type) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 14, CU_FILE_CUDA_POINTER_RANGE_ERROR, pointer range exceeds allocated address range) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 15, CU_FILE_CUDA_CONTEXT_MISMATCH, cuda context mismatch) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 16, CU_FILE_INVALID_MAPPING_SIZE, access beyond maximum pinned size) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 17, CU_FILE_INVALID_MAPPING_RANGE, access beyond mapped size) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 18, CU_FILE_INVALID_FILE_TYPE, unsupported file type) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 19, CU_FILE_INVALID_FILE_OPEN_FLAG, unsupported file open flags) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 20, CU_FILE_DIO_NOT_SET, fd direct IO not set) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 22, CU_FILE_INVALID_VALUE, invalid arguments) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 23, CU_FILE_MEMORY_ALREADY_REGISTERED, device pointer already registered) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 24, CU_FILE_MEMORY_NOT_REGISTERED, device pointer lookup failure) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 25, CU_FILE_PERMISSION_DENIED, driver or file access error) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 26, CU_FILE_DRIVER_ALREADY_OPEN, driver is already open) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 27, CU_FILE_HANDLE_NOT_REGISTERED, file descriptor is not registered) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 28, CU_FILE_HANDLE_ALREADY_REGISTERED, file descriptor is already registered) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 29, CU_FILE_DEVICE_NOT_FOUND, GPU device not found) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 30, CU_FILE_INTERNAL_ERROR, internal error) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 31, CU_FILE_GETNEWFD_FAILED, failed to obtain new file descriptor) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 33, CU_FILE_NVFS_SETUP_ERROR, NVFS driver initialization error) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 34, CU_FILE_IO_DISABLED, GPUDirect Storage disabled by config on current file)\
+	CUFILE_OP(CUFILEOP_BASE_ERR + 35, CU_FILE_BATCH_SUBMIT_FAILED, failed to submit batch operation)\
+	CUFILE_OP(CUFILEOP_BASE_ERR + 36, CU_FILE_GPU_MEMORY_PINNING_FAILED, failed to allocate pinned GPU Memory) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 37, CU_FILE_BATCH_FULL, queue full for batch operation) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 38, CU_FILE_ASYNC_NOT_SUPPORTED, cuFile stream operation not supported) \
+	CUFILE_OP(CUFILEOP_BASE_ERR + 39, CU_FILE_IO_MAX_ERROR, GPUDirect Storage Max Error)
+/**
+ * @brief cufileop status enum
+ *
+ * @note on success the error code is set to  @ref CU_FILE_SUCCESS.
+ * @note The error code can be inspected using @ref IS_CUFILE_ERR and @ref CUFILE_ERRSTR.
+ * @note The error code if set to @ref CU_FILE_CUDA_DRIVER_ERROR, then cuda error can be inspected using @ref IS_CUDA_ERR and @ref CU_FILE_CUDA_ERR.
+ * @note Data path errors are captured via standard error codes
+ */
+typedef enum CUfileOpError {
+        /// @cond DOXYGEN_SKIP_MACRO
+	#define CUFILE_OP(code, name, string) name = code,
+	CUFILEOP_STATUS_ENTRIES
+	#undef CUFILE_OP
+        ///@endcond
+} CUfileOpError;
+/// @endcond
+/**
+ * @brief cufileop status string
+ */
+static inline const char *cufileop_status_error(CUfileOpError status)
+{
+	switch (status) {
+	/// @cond DOXYGEN_SKIP_MACRO
+	#define CUFILE_OP(code, name, string) \
+	case name: return #string;
+	CUFILEOP_STATUS_ENTRIES
+	#undef CUFILE_OP
+	///@endcond
+	default:return "unknown cufile error";
+	}
+}
+/**
+ * @brief cufileop status string
+ */
+typedef struct CUfileError {
+	CUfileOpError err; // cufile error
+	CUresult cu_err; // cuda driver error
+}CUfileError_t;
+/**
+ * @brief  error macros to inspect error status of type @ref CUfileOpError
+ */
+#define IS_CUFILE_ERR(err) \
+	(abs((err)) > CUFILEOP_BASE_ERR)
+#define CUFILE_ERRSTR(err) \
+	cufileop_status_error((CUfileOpError)abs((err)))
+#define IS_CUDA_ERR(status) \
+	((status).err == CU_FILE_CUDA_DRIVER_ERROR)
+#define CU_FILE_CUDA_ERR(status) ((status).cu_err)
+/* driver properties */
+typedef enum CUfileDriverStatusFlags {
+        CU_FILE_LUSTRE_SUPPORTED = 0, /*!< Support for DDN LUSTRE */
+        CU_FILE_WEKAFS_SUPPORTED = 1, /*!< Support for WEKAFS */
+        CU_FILE_NFS_SUPPORTED = 2, /*!< Support for NFS */
+        CU_FILE_GPFS_SUPPORTED = 3, /*! < Support for GPFS */
+        CU_FILE_NVME_SUPPORTED = 4, /*!< Support for NVMe */
+        CU_FILE_NVMEOF_SUPPORTED = 5, /*!< Support for NVMeOF */
+        CU_FILE_SCSI_SUPPORTED = 6, /*!< Support for SCSI */
+	CU_FILE_SCALEFLUX_CSD_SUPPORTED = 7, /*!< Support for Scaleflux CSD*/
+	CU_FILE_NVMESH_SUPPORTED = 8, /*!< Support for NVMesh Block Dev*/
+	CU_FILE_BEEGFS_SUPPORTED = 9, /*!< Support for BeeGFS */
+    //10 is reserved for YRCloudFile
+        CU_FILE_NVME_P2P_SUPPORTED = 11,   /*!< Support for NVMe using PCI P2PDMA */
+}CUfileDriverStatusFlags_t;
+typedef enum CUfileDriverControlFlags {
+        CU_FILE_USE_POLL_MODE = 0 , /*!< use POLL mode. properties.use_poll_mode*/
+        CU_FILE_ALLOW_COMPAT_MODE = 1/*!< allow COMPATIBILITY mode. properties.allow_compat_mode*/
+}CUfileDriverControlFlags_t;
+typedef enum CUfileFeatureFlags {
+        CU_FILE_DYN_ROUTING_SUPPORTED = 0, /*!< Support for Dynamic routing to handle devices across the PCIe bridges */
+        CU_FILE_BATCH_IO_SUPPORTED = 1, /*!<  Unsupported */
+        CU_FILE_STREAMS_SUPPORTED = 2, /*!<  Unsupported */
+        CU_FILE_PARALLEL_IO_SUPPORTED = 3 /*!<  Unsupported */
+}CUfileFeatureFlags_t;
+typedef struct CUfileDrvProps {
+        struct {
+                unsigned int major_version;
+                unsigned int minor_version;
+                size_t poll_thresh_size;
+                size_t max_direct_io_size;
+                unsigned int dstatusflags;
+                unsigned int dcontrolflags;
+        } nvfs;
+        unsigned int fflags;
+        unsigned int max_device_cache_size;
+	unsigned int per_buffer_cache_size;
+        unsigned int max_device_pinned_mem_size;
+        unsigned int max_batch_io_size;
+        unsigned int max_batch_io_timeout_msecs;
+}CUfileDrvProps_t;
+typedef struct sockaddr sockaddr_t;
+typedef struct cufileRDMAInfo
+{
+        int version;
+        int desc_len;
+        const char *desc_str;
+}cufileRDMAInfo_t;
+#define CU_FILE_RDMA_REGISTER 1
+#define CU_FILE_RDMA_RELAXED_ORDERING (1<<1)
+typedef struct CUfileFSOps {
+      /* NULL means discover using fstat */
+      const char* (*fs_type) (void *handle);
+      /* list of host addresses to use,  NULL means no restriction */
+      int (*getRDMADeviceList)(void *handle, sockaddr_t **hostaddrs);
+      /* -1 no pref */
+      int (*getRDMADevicePriority)(void *handle, char*, size_t,
+                                loff_t, sockaddr_t* hostaddr);
+      /* NULL means try VFS */
+      ssize_t (*read) (void *handle, char*, size_t, loff_t, cufileRDMAInfo_t*);
+      ssize_t (*write) (void *handle, const char *, size_t, loff_t , cufileRDMAInfo_t*);
+}CUfileFSOps_t;
+/* File Handle */
+enum CUfileFileHandleType {
+	CU_FILE_HANDLE_TYPE_OPAQUE_FD = 1,   /*!< Linux based fd */
+	CU_FILE_HANDLE_TYPE_OPAQUE_WIN32 = 2, /*!< Windows based handle (unsupported) */
+        CU_FILE_HANDLE_TYPE_USERSPACE_FS = 3,  /* Userspace based FS */
+};
+typedef struct CUfileDescr_t {
+	enum CUfileFileHandleType type; /* type of file being registered */
+	union {
+		int fd; 		/* Linux   */
+		void *handle; 		/* Windows */
+	} handle;
+        const CUfileFSOps_t *fs_ops;     /* file system operation table */
+}CUfileDescr_t;
+/**
+ * @brief File handle type
+ *
+ */
+typedef void* CUfileHandle_t;
+#pragma GCC visibility push(default)
+/**
+ * @brief cuFileHandleRegister is required, and performs extra checking that is memoized to provide increased performance on later cuFile operations.
+ *
+ * @param fh @ref CUfileHandle_t opaque file handle for IO operations
+ * @param descr @ref CUfileDescr_t  file descriptor (OS agnostic)
+ *
+ * @return      CU_FILE_SUCCESS on successful completion. fh will be updated for use in @ref cuFileRead, @ref cuFileWrite, @ref cuFileHandleDeregister
+ * @return      CU_FILE_DRIVER_NOT_INITIALIZED on failure to load driver
+ * @return      CU_FILE_IO_NOT_SUPPORTED -  if filesystem is not supported
+ * @return      CU_FILE_INVALID_VALUE if null or bad api arguments
+ * @return      CU_FILE_INVALID_FILE_OPEN_FLAG if file is opened with unsupported modes like no O_DIRECT
+ * @return      CU_FILE_INVALID_FILE_TYPE if filepath is not valid or is not a regular file
+ * @return      CU_FILE_HANDLE_ALREADY_REGISTERED if file handle/descriptor is already registered
+ *
+ * <b>Description</b>
+ * cuFileHandleRegister registers the open file descriptor for use with cuFile IO operations.
+ *
+ * This API will ensure that the file’s descriptor is checked for GPUDirect Storage support and returns a valid file handle on CU_FILE_SUCCESS.
+ *
+ * @note the file needs to be opened in O_DIRECT mode to support GPUDirect Storage.
+ *
+ * @see cuFileRead
+ * @see cuFileWrite
+ * @see cuFileHandleDeregister
+ *
+ */
+CUfileError_t cuFileHandleRegister(CUfileHandle_t *fh, CUfileDescr_t *descr);
+/**
+ * @brief releases a registered filehandle from cuFile
+ *
+ * @param fh @ref CUfileHandle_t file handle
+ *
+ * @return void
+ *
+ * @see cuFileHandleRegister
+ */
+void cuFileHandleDeregister(CUfileHandle_t fh);
+/**
+ * @brief register an existing cudaMalloced memory with cuFile to pin for GPUDirect Storage access or
+ * register host allocated memory with cuFile.
+ *
+ * @param bufPtr_base buffer pointer allocated
+ * @param length  size of memory region from the above specified bufPtr
+ * @param flags   CU_FILE_RDMA_REGISTER
+ *
+ * @return  CU_FILE_SUCCESS on success
+ * @return  CU_FILE_NVFS_DRIVER_ERROR
+ * @return  CU_FILE_INVALID_VALUE
+ * @return  CU_FILE_CUDA_ERROR for unsuported memory type
+ * @return  CU_FILE_MEMORY_ALREADY_REGISTERED on error
+ * @return  CU_FILE_GPU_MEMORY_PINNING_FAILED if not enough pinned memory is available
+ * @note This memory will be use to perform GPU direct DMA from the supported storage.
+ * @warning This API is intended for usecases where the memory is used as streaming buffer that is reused across multiple cuFile IO operations before calling @ref cuFileBufDeregister
+ *
+ * @see cuFileBufDeregister
+ * @see cuFileRead
+ * @see cuFileWrite
+ */
+CUfileError_t cuFileBufRegister(const void *bufPtr_base, size_t length, int flags);
+/**
+ * @brief  deregister an already registered device or host memory from cuFile
+ *
+ * @param bufPtr_base  buffer pointer to deregister
+ *
+ * @return  CU_FILE_SUCCESS on success
+ * @return  CU_FILE_INVALID_VALUE on invalid memory pointer or unregistered memory pointer
+ *
+ * @see cuFileBufRegister
+ * @see cuFileRead
+ * @see cuFileWrite
+ */
+CUfileError_t cuFileBufDeregister(const void *bufPtr_base);
+/**
+ * @brief read data from a registered file handle to a specified device or host memory
+ *
+ * @param fh @ref CUfileHandle_t opaque file handle
+ * @param bufPtr_base  base address of buffer in device or host memory
+ * @param size    size bytes to read
+ * @param file_offset  file-offset from begining of the file
+ * @param bufPtr_offset  offset relative to the bufPtr_base pointer to read into.
+ *
+ * @return  size of bytes successfully read
+ * @return  -1 on error, in which case errno is set to indicate filesystem errors.
+ * @return  all other errors will return a negative integer value of @ref CUfileOpError enum value.
+ *
+ * @note  If the bufPtr is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers if needed.
+ * @note  This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
+ *        for cases where the BAR1 memory size is smaller than the size of the allocated memory.
+ *
+ * @see cuFileBufRegister
+ * @see cuFileHandleRegister
+ * @see cuFileWrite
+ */
+ssize_t cuFileRead(CUfileHandle_t fh, void *bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset);
+/**
+ * @brief  write data from a specified device or host memory to a registered file handle
+ *
+ * @param fh @ref CUfileHandle_t opaque file handle
+ * @param bufPtr_base  base address of buffer in device or host memory
+ * @param size    size bytes to write
+ * @param file_offset  file-offset from begining of the file
+ * @param bufPtr_offset  offset relative to the bufPtr_base pointer to write from.
+ *
+ * @return  size of bytes successfully written
+ * @return  -1 on error, in which case errno is set to indicate filesystem errors.
+ * @return  all other errors will return a negative integer value of @ref CUfileOpError enum value.
+ *
+ * @note  If the bufPtr is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers if needed.
+ * @note  This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
+ *        for cases where the BAR1 memory size is smaller than the size of the allocated memory.
+ *
+ * @see cuFileBufRegister
+ * @see cuFileHandleRegister
+ * @see cuFileRead
+ */
+ssize_t cuFileWrite(CUfileHandle_t fh, const void *bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset);
+// CUFile Driver APIs
+/**
+ * @brief
+ * Initialize the cuFile library and open the nvidia-fs driver
+ *
+ * @return CU_FILE_SUCCESS on success
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED
+ * @return CU_FILE_DRIVER_VERSION_MISMATCH on driver version mismatch error
+ *
+ * @see cuFileDriverClose
+ */
+CUfileError_t cuFileDriverOpen(void);
+CUfileError_t cuFileDriverClose(void);
+#define cuFileDriverClose cuFileDriverClose_v2
+/**
+ * @brief
+ * reset the cuFile library and release the nvidia-fs driver
+ *
+ * @return CU_FILE_SUCCESS on success
+ * @return CU_FILE_DRIVER_CLOSING if there are any active IO operations using @ref cuFileRead or @ref cuFileWrite
+ *
+ * @see cuFileDriverOpen
+ */
+CUfileError_t cuFileDriverClose(void);
+/**
+ * @brief
+ * returns use count of cufile drivers at that moment by the process.
+ */
+long cuFileUseCount(void);
+/**
+ * @brief
+ * Gets the Driver session properties
+ *
+ * @return CU_FILE_SUCCESS on success
+ *
+ * @see cuFileDriverSetPollMode
+ * @see cuFileDriverSetMaxDirectIOSize
+ * @see cuFileDriverSetMaxCacheSize
+ * @see cuFileDriverSetMaxPinnedMemSize
+ */
+CUfileError_t cuFileDriverGetProperties(CUfileDrvProps_t *props);
+/**
+ * @brief
+ * Sets whether the Read/Write APIs use polling to do IO operations
+ *
+ * @param  poll boolean to indicate whether to use poll mode or not
+ * @param  poll_threshold_size max IO size to use for POLLING mode in KB
+ *
+ * @return CU_FILE_SUCCESS on success
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
+ * @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
+ *
+ * @warning This is an advanced command and should be tuned based on available system memory
+ *
+ * @see cuFileDriverGetProperties
+ */
+CUfileError_t cuFileDriverSetPollMode(bool poll, size_t poll_threshold_size);
+/**
+ * @brief
+ * Control parameter to set max IO size(KB) used by the library to talk to nvidia-fs driver
+ *
+ * @param  max_direct_io_size maximum allowed direct io size in KB
+ *
+ * @return CU_FILE_SUCCESS on success
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
+ * @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
+ *
+ * @warning This is an advanced command and should be tuned based on available system memory
+ *
+ * @see cuFileDriverGetProperties
+ *
+ */
+CUfileError_t cuFileDriverSetMaxDirectIOSize(size_t max_direct_io_size);
+/**
+ * @brief
+ * Control parameter to set maximum GPU memory reserved per device by the library for internal buffering
+ *
+ * @param  max_cache_size The maximum GPU buffer space per device used for internal use in KB
+ *
+ * @return CU_FILE_SUCCESS on success
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
+ * @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
+ *
+ * @warning This is an advanced command and should be tuned based on supported GPU memory
+ *
+ * @see cuFileDriverGetProperties
+ */
+CUfileError_t cuFileDriverSetMaxCacheSize(size_t max_cache_size);
+/**
+ * @brief
+ * Sets maximum buffer space that is pinned in KB for use by @ref cuFileBufRegister
+ *
+ * @param max_pinned_size maximum buffer space that is pinned in KB
+ *
+ * @return CU_FILE_SUCCESS on success
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
+ * @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
+ *
+ * @warning This is an advanced command and should be tuned based on supported GPU memory
+ *
+ * @see cuFileDriverGetProperties
+ *
+ */
+CUfileError_t cuFileDriverSetMaxPinnedMemSize(size_t max_pinned_size);
+//Experimental Batch API's
+typedef enum CUfileOpcode {
+		CUFILE_READ = 0,
+		CUFILE_WRITE
+}CUfileOpcode_t;
+typedef enum CUFILEStatus_enum {
+	CUFILE_WAITING = 0x000001,  /* required value prior to submission */
+	CUFILE_PENDING = 0x000002,  /* once enqueued */
+	CUFILE_INVALID = 0x000004,  /* request was ill-formed or could not be enqueued */
+	CUFILE_CANCELED = 0x000008, /* request successfully canceled */
+	CUFILE_COMPLETE = 0x0000010, /* request successfully completed */
+	CUFILE_TIMEOUT = 0x0000020,  /* request timed out */
+	CUFILE_FAILED  = 0x0000040  /* unable to complete */
+}CUfileStatus_t;
+typedef enum cufileBatchMode {
+	CUFILE_BATCH = 1,
+} CUfileBatchMode_t;
+typedef struct CUfileIOParams {
+	CUfileBatchMode_t mode; // Must be the very first field.
+	union {
+		struct  {
+			void *devPtr_base; //This can be a device memory or a host memory pointer.
+			off_t file_offset;
+			off_t devPtr_offset;
+			size_t size;
+		}batch;
+	}u;
+	CUfileHandle_t fh;
+	CUfileOpcode_t opcode;
+	void *cookie;
+}CUfileIOParams_t;
+typedef struct CUfileIOEvents {
+	void *cookie;
+	CUfileStatus_t   status;      /* status of the operation */
+	size_t ret; /* -ve error or amount of I/O done. */
+}CUfileIOEvents_t;
+typedef void* CUfileBatchHandle_t;
+CUfileError_t cuFileBatchIOSetUp(CUfileBatchHandle_t *batch_idp, unsigned nr);
+CUfileError_t cuFileBatchIOSubmit(CUfileBatchHandle_t batch_idp, unsigned nr, CUfileIOParams_t *iocbp, unsigned int flags);
+CUfileError_t cuFileBatchIOGetStatus(CUfileBatchHandle_t batch_idp, unsigned min_nr, unsigned* nr,
+				CUfileIOEvents_t *iocbp, struct timespec* timeout);
+CUfileError_t cuFileBatchIOCancel(CUfileBatchHandle_t batch_idp);
+void cuFileBatchIODestroy(CUfileBatchHandle_t batch_idp);
+//Async API's with cuda streams
+// cuFile stream API registration flags
+// buffer pointer offset is set at submission time
+#define CU_FILE_STREAM_FIXED_BUF_OFFSET         1
+// file offset is set at submission time
+#define CU_FILE_STREAM_FIXED_FILE_OFFSET        2
+// file size is set at submission time
+#define CU_FILE_STREAM_FIXED_FILE_SIZE          4
+// size, offset and buffer offset are 4k aligned
+#define CU_FILE_STREAM_PAGE_ALIGNED_INPUTS      8
+/**
+ *@brief
+ * @param fh The cuFile handle for the file.
+ * @param bufPtr_base  base address of buffer in device or host memory
+ * @param size_p  pointer to size bytes to read
+ * @note  *size_p if the size is not known at the time of submission, then must provide the max possible size for I/O request.
+ * @param file_offset_p  pointer to file-offset from begining of the file
+ * @param bufPtr_offset_p  pointer to offset relative to the bufPtr_base pointer to read into.
+ * @param bytes_read_p  pointer to the number of bytes that were successfully read.
+ * @param CUstream stream cuda stream for the operation.
+ *
+ * @return  size of bytes successfully read in *bytes_read_p
+ * @return  -1 on error, in which case errno is set to indicate filesystem errors.
+ * @return  all other errors will return a negative integer value of @ref CUfileOpError enum value.
+ *
+ * @note  If the bufPtr_base is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers.
+ * @note  This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
+ *        for cases where the BAR1 memory size is smaller than the size of the allocated memory.
+ * @note  If the stream is registered with cuFileStreamRegister, the IO setup and teardown overhead will be reduced.
+ * @note  on cuda stream errors, the user must call cuFileStreamDeregister to release any outstanding cuFile resources for the stream.
+ *
+ *
+ * @see cuFileBufRegister
+ * @see cuFileHandleRegister
+ * @see cuFileRead
+ * @see cuFileStreamRegister
+ * @see cuFileStreamDeregister
+ */
+CUfileError_t cuFileReadAsync(CUfileHandle_t fh, void *bufPtr_base,
+                        size_t *size_p, off_t *file_offset_p, off_t *bufPtr_offset_p, ssize_t *bytes_read_p, CUstream stream);
+/**
+ *@brief
+* @param fh The cuFile handle for the file.
+ * @param bufPtr_base  base address of buffer in device or host memory
+ * @param size_p    pointer to size bytes to write.
+ * @note  *size_p if the size is not known at the time of submission, then must provide the max possible size for I/O request.
+ * @param file_offset_p  pointer to file-offset from begining of the file
+ * @param bufPtr_offset_p  pointer to offset relative to the bufPtr_base pointer to write from.
+ * @param bytes_written_p pointer to the number of bytes that were successfully written.
+ * @param CUstream cuda stream for the operation.
+ *
+ * @return  size of bytes successfully written in *bytes_written_p
+ * @return  -1 on error, in which case errno is set to indicate filesystem errors.
+ * @return  all other errors will return a negative integer value of @ref CUfileOpError enum value.
+ *
+ * @note  If the bufPtr_base is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers.
+ * @note  This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
+ *        for cases where the BAR1 memory size is smaller than the size of the allocated memory.
+ * @note  If the stream is registered with cuFileStreamRegister prior to this call, the IO setup and teardown overhead will be reduced.
+ * @note  on cuda stream errors, the user must call cuFileStreamDeregister to release any outstanding cuFile resources for the stream.
+ *
+ * @see cuFileBufRegister
+ * @see cuFileHandleRegister
+ * @see cuFileWrite
+ * @see cuFileStreamRegister
+ * @see cuFileStreamDeregister
+ */
+CUfileError_t cuFileWriteAsync(CUfileHandle_t fh, void *bufPtr_base,
+                        size_t *size_p, off_t *file_offset_p, off_t *bufPtr_offset_p, ssize_t *bytes_written_p, CUstream stream);
+/**
+ *@brief
+ * @param CUstream cuda stream for the operation.
+ * @param flags for the stream to improve the stream execution of IO based on input parameters.
+ * @note  supported FLAGS are
+ * @note CU_FILE_STREAM_FIXED_BUF_OFFSET - buffer pointer offset is set at submission time
+ * @note CU_FILE_STREAM_FIXED_FILE_OFFSET - file offset is set at submission time
+ * @note CU_FILE_STREAM_FIXED_FILE_SIZE  - file size is set at submission time
+ * @note CU_FILE_STREAM_PAGE_ALIGNED_INPUTS - size, offset and buffer offset are 4k aligned
+ *
+ * @note  allocates resources needed to support cuFile operations asynchronously for the cuda stream
+ * @note  This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
+ *        for cases where the BAR1 memory size is smaller than the size of the allocated memory.
+ *
+ * @return CU_FILE_SUCCESS on success
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
+ * @return CU_FILE_INVALID_VALUE if the stream is invalid
+ *
+ * @see cuFileReadAsync
+ * @see cuFileWriteAsync
+ * @see cuFileStreamDeregister
+ */
+CUfileError_t cuFileStreamRegister(CUstream stream, unsigned flags);
+/**
+ *@brief
+ * @param CUstream cuda stream for the operation.
+ *
+ * @note  deallocates resources used by previous cuFile asynchronous operations for the cuda stream
+ * @note  highly recommend to call after cuda stream errors to release any outstanding cuFile resources for this stream
+ * @note  must be called before cuStreamDestroy call for the specified stream.
+ * @note  This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
+ *        for cases where the BAR1 memory size is smaller than the size of the allocated memory.
+ *
+ * @return CU_FILE_SUCCESS on success
+ * @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
+ * @return CU_FILE_INVALID_VALUE if the stream is invalid
+ *
+ * @see cuFileReadAsync
+ * @see cuFileWriteAsync
+ * @see cuFileStreamRegister
+ */
+CUfileError_t cuFileStreamDeregister(CUstream stream);
+/**
+ *@brief
+ * @returns cufile library version.
+ *
+ * @The version is returned as (1000 major + 10 minor).
+ * @For example, CUFILE 1.7.0 would be represented by 1070.
+ * @note  This is useful for applications that need to inquire the library.
+ *
+ * @return CU_FILE_SUCCESS on success
+ * @return CU_FILE_INVALID_VALUE if the input parameter is null.
+ * @return CU_FILE_DRIVER_VERSION_READ_ERROR if the version is not available.
+ *
+ */
+CUfileError_t cuFileGetVersion(int *version);
+#pragma GCC visibility pop
+/// @cond DOXYGEN_SKIP_MACRO
+#endif // CUFILE_H
+/// @endcond
+#ifdef __cplusplus
+}
+#endif

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/lib/__init__.py ADDED Viewed

File without changes

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/lib/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (209 Bytes). View file

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/cufile/lib/libcufile_rdma.so.1 ADDED Viewed

Binary file (46.5 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (205 Bytes). View file

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/include/__init__.py ADDED Viewed

File without changes

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/include/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (213 Bytes). View file

URSA/.venv_ursa/lib/python3.12/site-packages/nvidia/curand/include/curand.h ADDED Viewed

	@@ -0,0 +1,1080 @@

+ /* Copyright 2010-2014 NVIDIA Corporation.  All rights reserved.
+  *
+  * NOTICE TO LICENSEE:
+  *
+  * The source code and/or documentation ("Licensed Deliverables") are
+  * subject to NVIDIA intellectual property rights under U.S. and
+  * international Copyright laws.
+  *
+  * The Licensed Deliverables contained herein are PROPRIETARY and
+  * CONFIDENTIAL to NVIDIA and are being provided under the terms and
+  * conditions of a form of NVIDIA software license agreement by and
+  * between NVIDIA and Licensee ("License Agreement") or electronically
+  * accepted by Licensee.  Notwithstanding any terms or conditions to
+  * the contrary in the License Agreement, reproduction or disclosure
+  * of the Licensed Deliverables to any third party without the express
+  * written consent of NVIDIA is prohibited.
+  *
+  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  THEY ARE
+  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+  * OF THESE LICENSED DELIVERABLES.
+  *
+  * U.S. Government End Users.  These Licensed Deliverables are a
+  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+  * 1995), consisting of "commercial computer software" and "commercial
+  * computer software documentation" as such terms are used in 48
+  * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government
+  * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+  * U.S. Government End Users acquire the Licensed Deliverables with
+  * only those rights set forth herein.
+  *
+  * Any use of the Licensed Deliverables in individual and commercial
+  * software must include, in the user documentation and internal
+  * comments to the code, the above Disclaimer and U.S. Government End
+  * Users Notice.
+  */
+#if !defined(CURAND_H_)
+#define CURAND_H_
+/**
+ * \defgroup HOST Host API
+ *
+ * @{
+ */
+#ifndef __CUDACC_RTC__
+#include <cuda_runtime.h>
+#endif
+#ifndef CURANDAPI
+#ifdef _WIN32
+#define CURANDAPI __stdcall
+#else
+#define CURANDAPI
+#endif
+#endif
+#if defined(__cplusplus)
+extern "C" {
+#endif /* __cplusplus */
+#define CURAND_VER_MAJOR 10
+#define CURAND_VER_MINOR 3
+#define CURAND_VER_PATCH 9
+#define CURAND_VER_BUILD 90
+#define CURAND_VERSION (CURAND_VER_MAJOR * 1000 + \
+                        CURAND_VER_MINOR *  100 + \
+                        CURAND_VER_PATCH)
+/* CURAND Host API datatypes */
+/**
+ * @{
+ */
+/**
+ * CURAND function call status types
+ */
+enum curandStatus {
+    CURAND_STATUS_SUCCESS = 0, ///< No errors
+    CURAND_STATUS_VERSION_MISMATCH = 100, ///< Header file and linked library version do not match
+    CURAND_STATUS_NOT_INITIALIZED = 101, ///< Generator not initialized
+    CURAND_STATUS_ALLOCATION_FAILED = 102, ///< Memory allocation failed
+    CURAND_STATUS_TYPE_ERROR = 103, ///< Generator is wrong type
+    CURAND_STATUS_OUT_OF_RANGE = 104, ///< Argument out of range
+    CURAND_STATUS_LENGTH_NOT_MULTIPLE = 105, ///< Length requested is not a multple of dimension
+    CURAND_STATUS_DOUBLE_PRECISION_REQUIRED = 106, ///< GPU does not have double precision required by MRG32k3a
+    CURAND_STATUS_LAUNCH_FAILURE = 201, ///< Kernel launch failure
+    CURAND_STATUS_PREEXISTING_FAILURE = 202, ///< Preexisting failure on library entry
+    CURAND_STATUS_INITIALIZATION_FAILED = 203, ///< Initialization of CUDA failed
+    CURAND_STATUS_ARCH_MISMATCH = 204, ///< Architecture mismatch, GPU does not support requested feature
+    CURAND_STATUS_INTERNAL_ERROR = 999 ///< Internal library error
+};
+/*
+ * CURAND function call status types
+*/
+/** \cond UNHIDE_TYPEDEFS */
+typedef enum curandStatus curandStatus_t;
+/** \endcond */
+/**
+ * CURAND generator types
+ */
+enum curandRngType {
+    CURAND_RNG_TEST = 0,
+    CURAND_RNG_PSEUDO_DEFAULT = 100, ///< Default pseudorandom generator
+    CURAND_RNG_PSEUDO_XORWOW = 101, ///< XORWOW pseudorandom generator
+    CURAND_RNG_PSEUDO_MRG32K3A = 121, ///< MRG32k3a pseudorandom generator
+    CURAND_RNG_PSEUDO_MTGP32 = 141, ///< Mersenne Twister MTGP32 pseudorandom generator
+    CURAND_RNG_PSEUDO_MT19937 = 142, ///< Mersenne Twister MT19937 pseudorandom generator
+    CURAND_RNG_PSEUDO_PHILOX4_32_10 = 161, ///< PHILOX-4x32-10 pseudorandom generator
+    CURAND_RNG_QUASI_DEFAULT = 200, ///< Default quasirandom generator
+    CURAND_RNG_QUASI_SOBOL32 = 201, ///< Sobol32 quasirandom generator
+    CURAND_RNG_QUASI_SCRAMBLED_SOBOL32 = 202,  ///< Scrambled Sobol32 quasirandom generator
+    CURAND_RNG_QUASI_SOBOL64 = 203, ///< Sobol64 quasirandom generator
+    CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 = 204  ///< Scrambled Sobol64 quasirandom generator
+};
+/*
+ * CURAND generator types
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef enum curandRngType curandRngType_t;
+/** \endcond */
+/**
+ * CURAND ordering of results in memory
+ */
+enum curandOrdering {
+    CURAND_ORDERING_PSEUDO_BEST = 100, ///< Best ordering for pseudorandom results
+    CURAND_ORDERING_PSEUDO_DEFAULT = 101, ///< Specific default thread sequence for pseudorandom results, same as CURAND_ORDERING_PSEUDO_BEST
+    CURAND_ORDERING_PSEUDO_SEEDED = 102, ///< Specific seeding pattern for fast lower quality pseudorandom results
+    CURAND_ORDERING_PSEUDO_LEGACY = 103, ///< Specific legacy sequence for pseudorandom results, guaranteed to remain the same for all cuRAND release
+    CURAND_ORDERING_PSEUDO_DYNAMIC = 104, ///< Specific ordering adjusted to the device it is being executed on, provides the best performance
+    CURAND_ORDERING_QUASI_DEFAULT = 201 ///< Specific n-dimensional ordering for quasirandom results
+};
+/*
+ * CURAND ordering of results in memory
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef enum curandOrdering curandOrdering_t;
+/** \endcond */
+/**
+ * CURAND choice of direction vector set
+ */
+enum curandDirectionVectorSet {
+    CURAND_DIRECTION_VECTORS_32_JOEKUO6 = 101, ///< Specific set of 32-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions
+    CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6 = 102, ///< Specific set of 32-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions, and scrambled
+    CURAND_DIRECTION_VECTORS_64_JOEKUO6 = 103, ///< Specific set of 64-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions
+    CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6 = 104 ///< Specific set of 64-bit direction vectors generated from polynomials recommended by S. Joe and F. Y. Kuo, for up to 20,000 dimensions, and scrambled
+};
+/*
+ * CURAND choice of direction vector set
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef enum curandDirectionVectorSet curandDirectionVectorSet_t;
+/** \endcond */
+/**
+ * CURAND array of 32-bit direction vectors
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef unsigned int curandDirectionVectors32_t[32];
+/** \endcond */
+ /**
+ * CURAND array of 64-bit direction vectors
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef unsigned long long curandDirectionVectors64_t[64];
+/** \endcond **/
+/**
+ * CURAND generator (opaque)
+ */
+struct curandGenerator_st;
+/**
+ * CURAND generator
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef struct curandGenerator_st *curandGenerator_t;
+/** \endcond */
+/**
+ * CURAND distribution
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef double curandDistribution_st;
+typedef curandDistribution_st *curandDistribution_t;
+typedef struct curandDistributionShift_st *curandDistributionShift_t;
+/** \endcond */
+/**
+ * CURAND distribution M2
+ */
+/** \cond UNHIDE_TYPEDEFS */
+typedef struct curandDistributionM2Shift_st *curandDistributionM2Shift_t;
+typedef struct curandHistogramM2_st *curandHistogramM2_t;
+typedef unsigned int curandHistogramM2K_st;
+typedef curandHistogramM2K_st *curandHistogramM2K_t;
+typedef curandDistribution_st curandHistogramM2V_st;
+typedef curandHistogramM2V_st *curandHistogramM2V_t;
+typedef struct curandDiscreteDistribution_st *curandDiscreteDistribution_t;
+/** \endcond */
+/*
+ * CURAND METHOD
+ */
+/** \cond UNHIDE_ENUMS */
+enum curandMethod {
+    CURAND_CHOOSE_BEST = 0, // choose best depends on args
+    CURAND_ITR = 1,
+    CURAND_KNUTH = 2,
+    CURAND_HITR = 3,
+    CURAND_M1 = 4,
+    CURAND_M2 = 5,
+    CURAND_BINARY_SEARCH = 6,
+    CURAND_DISCRETE_GAUSS = 7,
+    CURAND_REJECTION = 8,
+    CURAND_DEVICE_API = 9,
+    CURAND_FAST_REJECTION = 10,
+    CURAND_3RD = 11,
+    CURAND_DEFINITION = 12,
+    CURAND_POISSON = 13
+};
+typedef enum curandMethod curandMethod_t;
+/** \endcond */
+#ifndef __CUDACC_RTC__
+/**
+ * @}
+ */
+/**
+ * \brief Create new random number generator.
+ *
+ * Creates a new random number generator of type \p rng_type
+ * and returns it in \p *generator.
+ *
+ * Legal values for \p rng_type are:
+ * - CURAND_RNG_PSEUDO_DEFAULT
+ * - CURAND_RNG_PSEUDO_XORWOW
+ * - CURAND_RNG_PSEUDO_MRG32K3A
+ * - CURAND_RNG_PSEUDO_MTGP32
+ * - CURAND_RNG_PSEUDO_MT19937
+ * - CURAND_RNG_PSEUDO_PHILOX4_32_10
+ * - CURAND_RNG_QUASI_DEFAULT
+ * - CURAND_RNG_QUASI_SOBOL32
+ * - CURAND_RNG_QUASI_SCRAMBLED_SOBOL32
+ * - CURAND_RNG_QUASI_SOBOL64
+ * - CURAND_RNG_QUASI_SCRAMBLED_SOBOL64
+ *
+ * When \p rng_type is CURAND_RNG_PSEUDO_DEFAULT, the type chosen
+ * is CURAND_RNG_PSEUDO_XORWOW.  \n
+ * When \p rng_type is CURAND_RNG_QUASI_DEFAULT,
+ * the type chosen is CURAND_RNG_QUASI_SOBOL32.
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_XORWOW are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MRG32K3A are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MTGP32 are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MT19937 are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * * The default values for \p rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10 are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL32 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL64 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBBLED_SOBOL32 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * \param generator - Pointer to generator
+ * \param rng_type - Type of generator to create
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED, if memory could not be allocated \n
+ * - CURAND_STATUS_INITIALIZATION_FAILED if there was a problem setting up the GPU \n
+ * - CURAND_STATUS_VERSION_MISMATCH if the header file version does not match the
+ *   dynamically linked library version \n
+ * - CURAND_STATUS_TYPE_ERROR if the value for \p rng_type is invalid \n
+ * - CURAND_STATUS_SUCCESS if generator was created successfully \n
+ *
+ */
+curandStatus_t CURANDAPI
+curandCreateGenerator(curandGenerator_t *generator, curandRngType_t rng_type);
+/**
+ * \brief Create new host CPU random number generator.
+ *
+ * Creates a new host CPU random number generator of type \p rng_type
+ * and returns it in \p *generator.
+ *
+ * Legal values for \p rng_type are:
+ * - CURAND_RNG_PSEUDO_DEFAULT
+ * - CURAND_RNG_PSEUDO_XORWOW
+ * - CURAND_RNG_PSEUDO_MRG32K3A
+ * - CURAND_RNG_PSEUDO_MTGP32
+ * - CURAND_RNG_PSEUDO_MT19937
+ * - CURAND_RNG_PSEUDO_PHILOX4_32_10
+ * - CURAND_RNG_QUASI_DEFAULT
+ * - CURAND_RNG_QUASI_SOBOL32
+ * - CURAND_RNG_QUASI_SCRAMBLED_SOBOL32
+ * - CURAND_RNG_QUASI_SOBOL64
+ * - CURAND_RNG_QUASI_SCRAMBLED_SOBOL64
+ *
+ * When \p rng_type is CURAND_RNG_PSEUDO_DEFAULT, the type chosen
+ * is CURAND_RNG_PSEUDO_XORWOW.  \n
+ * When \p rng_type is CURAND_RNG_QUASI_DEFAULT,
+ * the type chosen is CURAND_RNG_QUASI_SOBOL32.
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_XORWOW are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MRG32K3A are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MTGP32 are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_PSEUDO_MT19937 are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * * The default values for \p rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10 are:
+ * - \p seed = 0
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_PSEUDO_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL32 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SOBOL64 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL32 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * The default values for \p rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 are:
+ * - \p dimensions = 1
+ * - \p offset = 0
+ * - \p ordering = CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * \param generator - Pointer to generator
+ * \param rng_type - Type of generator to create
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_INITIALIZATION_FAILED if there was a problem setting up the GPU \n
+ * - CURAND_STATUS_VERSION_MISMATCH if the header file version does not match the
+ *   dynamically linked library version \n
+ * - CURAND_STATUS_TYPE_ERROR if the value for \p rng_type is invalid \n
+ * - CURAND_STATUS_SUCCESS if generator was created successfully \n
+ */
+curandStatus_t CURANDAPI
+curandCreateGeneratorHost(curandGenerator_t *generator, curandRngType_t rng_type);
+/**
+ * \brief Destroy an existing generator.
+ *
+ * Destroy an existing generator and free all memory associated with its state.
+ *
+ * \param generator - Generator to destroy
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_SUCCESS if generator was destroyed successfully \n
+ */
+curandStatus_t CURANDAPI
+curandDestroyGenerator(curandGenerator_t generator);
+/**
+ * \brief Return the version number of the library.
+ *
+ * Return in \p *version the version number of the dynamically linked CURAND
+ * library.  The format is the same as CUDART_VERSION from the CUDA Runtime.
+ * The only supported configuration is CURAND version equal to CUDA Runtime
+ * version.
+ *
+ * \param version - CURAND library version
+ *
+ * \return
+ * - CURAND_STATUS_SUCCESS if the version number was successfully returned \n
+ */
+curandStatus_t CURANDAPI
+curandGetVersion(int *version);
+/**
+* \brief Return the value of the curand property.
+*
+* Return in \p *value the number for the property described by \p type of the
+* dynamically linked CURAND library.
+*
+* \param type - CUDA library property
+* \param value - integer value for the requested property
+*
+* \return
+* - CURAND_STATUS_SUCCESS if the property value was successfully returned \n
+* - CURAND_STATUS_OUT_OF_RANGE if the property type is not recognized \n
+*/
+curandStatus_t CURANDAPI
+curandGetProperty(libraryPropertyType type, int *value);
+/**
+ * \brief Set the current stream for CURAND kernel launches.
+ *
+ * Set the current stream for CURAND kernel launches.  All library functions
+ * will use this stream until set again.
+ *
+ * \param generator - Generator to modify
+ * \param stream - Stream to use or ::NULL for null stream
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_SUCCESS if stream was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandSetStream(curandGenerator_t generator, cudaStream_t stream);
+/**
+ * \brief Set the seed value of the pseudo-random number generator.
+ *
+ * Set the seed value of the pseudorandom number generator.
+ * All values of seed are valid.  Different seeds will produce different sequences.
+ * Different seeds will often not be statistically correlated with each other,
+ * but some pairs of seed values may generate sequences which are statistically correlated.
+ *
+ * \param generator - Generator to modify
+ * \param seed - Seed value
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_TYPE_ERROR if the generator is not a pseudorandom number generator \n
+ * - CURAND_STATUS_SUCCESS if generator seed was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandSetPseudoRandomGeneratorSeed(curandGenerator_t generator, unsigned long long seed);
+/**
+ * \brief Set the absolute offset of the pseudo or quasirandom number generator.
+ *
+ * Set the absolute offset of the pseudo or quasirandom number generator.
+ *
+ * All values of offset are valid.  The offset position is absolute, not
+ * relative to the current position in the sequence.
+ *
+ * \param generator - Generator to modify
+ * \param offset - Absolute offset position
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_SUCCESS if generator offset was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandSetGeneratorOffset(curandGenerator_t generator, unsigned long long offset);
+/**
+ * \brief Set the ordering of results of the pseudo or quasirandom number generator.
+ *
+ * Set the ordering of results of the pseudo or quasirandom number generator.
+ *
+ * Legal values of \p order for pseudorandom generators are:
+ * - CURAND_ORDERING_PSEUDO_DEFAULT
+ * - CURAND_ORDERING_PSEUDO_BEST
+ * - CURAND_ORDERING_PSEUDO_SEEDED
+ * - CURAND_ORDERING_PSEUDO_LEGACY
+ *
+ * Legal values of \p order for quasirandom generators are:
+ * - CURAND_ORDERING_QUASI_DEFAULT
+ *
+ * \param generator - Generator to modify
+ * \param order - Ordering of results
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_OUT_OF_RANGE if the ordering is not valid \n
+ * - CURAND_STATUS_SUCCESS if generator ordering was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandSetGeneratorOrdering(curandGenerator_t generator, curandOrdering_t order);
+/**
+ * \brief Set the number of dimensions.
+ *
+ * Set the number of dimensions to be generated by the quasirandom number
+ * generator.
+ *
+ * Legal values for \p num_dimensions are 1 to 20000.
+ *
+ * \param generator - Generator to modify
+ * \param num_dimensions - Number of dimensions
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_OUT_OF_RANGE if num_dimensions is not valid \n
+ * - CURAND_STATUS_TYPE_ERROR if the generator is not a quasirandom number generator \n
+ * - CURAND_STATUS_SUCCESS if generator ordering was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandSetQuasiRandomGeneratorDimensions(curandGenerator_t generator, unsigned int num_dimensions);
+/**
+ * \brief Generate 32-bit pseudo or quasirandom numbers.
+ *
+ * Use \p generator to generate \p num 32-bit results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 32-bit values with every bit random.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param num - Number of random 32-bit values to generate
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *     a previous kernel launch \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_TYPE_ERROR if the generator is a 64 bit quasirandom generator.
+ * (use ::curandGenerateLongLong() with 64 bit quasirandom generators)
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerate(curandGenerator_t generator, unsigned int *outputPtr, size_t num);
+/**
+ * \brief Generate 64-bit quasirandom numbers.
+ *
+ * Use \p generator to generate \p num 64-bit results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 64-bit values with every bit random.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param num - Number of random 64-bit values to generate
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *     a previous kernel launch \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_TYPE_ERROR if the generator is not a 64 bit quasirandom generator\n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateLongLong(curandGenerator_t generator, unsigned long long *outputPtr, size_t num);
+/**
+ * \brief Generate uniformly distributed floats.
+ *
+ * Use \p generator to generate \p num float results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 32-bit floating point values between \p 0.0f and \p 1.0f,
+ * excluding \p 0.0f and including \p 1.0f.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param num - Number of floats to generate
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension \n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateUniform(curandGenerator_t generator, float *outputPtr, size_t num);
+/**
+ * \brief Generate uniformly distributed doubles.
+ *
+ * Use \p generator to generate \p num double results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 64-bit double precision floating point values between
+ * \p 0.0 and \p 1.0, excluding \p 0.0 and including \p 1.0.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param num - Number of doubles to generate
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension \n
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateUniformDouble(curandGenerator_t generator, double *outputPtr, size_t num);
+/**
+ * \brief Generate normally distributed doubles.
+ *
+ * Use \p generator to generate \p n float results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 32-bit floating point values with mean \p mean and standard
+ * deviation \p stddev.
+ *
+ * Normally distributed results are generated from pseudorandom generators
+ * with a Box-Muller transform, and so require \p n to be even.
+ * Quasirandom generators use an inverse cumulative distribution
+ * function to preserve dimensionality.
+ *
+ * There may be slight numerical differences between results generated
+ * on the GPU with generators created with ::curandCreateGenerator()
+ * and results calculated on the CPU with generators created with
+ * ::curandCreateGeneratorHost().  These differences arise because of
+ * differences in results for transcendental functions.  In addition,
+ * future versions of CURAND may use newer versions of the CUDA math
+ * library, so different versions of CURAND may give slightly different
+ * numerical values.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param n - Number of floats to generate
+ * \param mean - Mean of normal distribution
+ * \param stddev - Standard deviation of normal distribution
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension, or is not a multiple
+ *    of two for pseudorandom generators \n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateNormal(curandGenerator_t generator, float *outputPtr,
+                     size_t n, float mean, float stddev);
+/**
+ * \brief Generate normally distributed doubles.
+ *
+ * Use \p generator to generate \p n double results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 64-bit floating point values with mean \p mean and standard
+ * deviation \p stddev.
+ *
+ * Normally distributed results are generated from pseudorandom generators
+ * with a Box-Muller transform, and so require \p n to be even.
+ * Quasirandom generators use an inverse cumulative distribution
+ * function to preserve dimensionality.
+ *
+ * There may be slight numerical differences between results generated
+ * on the GPU with generators created with ::curandCreateGenerator()
+ * and results calculated on the CPU with generators created with
+ * ::curandCreateGeneratorHost().  These differences arise because of
+ * differences in results for transcendental functions.  In addition,
+ * future versions of CURAND may use newer versions of the CUDA math
+ * library, so different versions of CURAND may give slightly different
+ * numerical values.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param n - Number of doubles to generate
+ * \param mean - Mean of normal distribution
+ * \param stddev - Standard deviation of normal distribution
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension, or is not a multiple
+ *    of two for pseudorandom generators \n
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateNormalDouble(curandGenerator_t generator, double *outputPtr,
+                     size_t n, double mean, double stddev);
+/**
+ * \brief Generate log-normally distributed floats.
+ *
+ * Use \p generator to generate \p n float results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 32-bit floating point values with log-normal distribution based on
+ * an associated normal distribution with mean \p mean and standard deviation \p stddev.
+ *
+ * Normally distributed results are generated from pseudorandom generators
+ * with a Box-Muller transform, and so require \p n to be even.
+ * Quasirandom generators use an inverse cumulative distribution
+ * function to preserve dimensionality.
+ * The normally distributed results are transformed into log-normal distribution.
+ *
+ * There may be slight numerical differences between results generated
+ * on the GPU with generators created with ::curandCreateGenerator()
+ * and results calculated on the CPU with generators created with
+ * ::curandCreateGeneratorHost().  These differences arise because of
+ * differences in results for transcendental functions.  In addition,
+ * future versions of CURAND may use newer versions of the CUDA math
+ * library, so different versions of CURAND may give slightly different
+ * numerical values.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param n - Number of floats to generate
+ * \param mean - Mean of associated normal distribution
+ * \param stddev - Standard deviation of associated normal distribution
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension, or is not a multiple
+ *    of two for pseudorandom generators \n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateLogNormal(curandGenerator_t generator, float *outputPtr,
+                     size_t n, float mean, float stddev);
+/**
+ * \brief Generate log-normally distributed doubles.
+ *
+ * Use \p generator to generate \p n double results into the device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 64-bit floating point values with log-normal distribution based on
+ * an associated normal distribution with mean \p mean and standard deviation \p stddev.
+ *
+ * Normally distributed results are generated from pseudorandom generators
+ * with a Box-Muller transform, and so require \p n to be even.
+ * Quasirandom generators use an inverse cumulative distribution
+ * function to preserve dimensionality.
+ * The normally distributed results are transformed into log-normal distribution.
+ *
+ * There may be slight numerical differences between results generated
+ * on the GPU with generators created with ::curandCreateGenerator()
+ * and results calculated on the CPU with generators created with
+ * ::curandCreateGeneratorHost().  These differences arise because of
+ * differences in results for transcendental functions.  In addition,
+ * future versions of CURAND may use newer versions of the CUDA math
+ * library, so different versions of CURAND may give slightly different
+ * numerical values.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param n - Number of doubles to generate
+ * \param mean - Mean of normal distribution
+ * \param stddev - Standard deviation of normal distribution
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension, or is not a multiple
+ *    of two for pseudorandom generators \n
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateLogNormalDouble(curandGenerator_t generator, double *outputPtr,
+                     size_t n, double mean, double stddev);
+/**
+ * \brief Construct the histogram array for a Poisson distribution.
+ *
+ * Construct the histogram array for the Poisson distribution with lambda \p lambda.
+ * For lambda greater than 2000, an approximation with a normal distribution is used.
+ *
+ * \param lambda - lambda for the Poisson distribution
+ *
+ *
+ * \param discrete_distribution - pointer to the histogram in device memory
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU does not support double precision \n
+ * - CURAND_STATUS_INITIALIZATION_FAILED if there was a problem setting up the GPU \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the distribution pointer was null \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_OUT_OF_RANGE if lambda is non-positive or greater than 400,000 \n
+ * - CURAND_STATUS_SUCCESS if the histogram was generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandCreatePoissonDistribution(double lambda, curandDiscreteDistribution_t *discrete_distribution);
+/**
+ * \brief Destroy the histogram array for a discrete distribution (e.g. Poisson).
+ *
+ * Destroy the histogram array for a discrete distribution created by curandCreatePoissonDistribution.
+ *
+ * \param discrete_distribution - pointer to device memory where the histogram is stored
+ *
+ * \return
+ * - CURAND_STATUS_NOT_INITIALIZED if the histogram was never created \n
+ * - CURAND_STATUS_SUCCESS if the histogram was destroyed successfully \n
+ */
+curandStatus_t CURANDAPI
+curandDestroyDistribution(curandDiscreteDistribution_t discrete_distribution);
+/**
+ * \brief Generate Poisson-distributed unsigned ints.
+ *
+ * Use \p generator to generate \p n unsigned int results into device memory at
+ * \p outputPtr.  The device memory must have been previously allocated and must be
+ * large enough to hold all the results.  Launches are done with the stream
+ * set using ::curandSetStream(), or the null stream if no stream has been set.
+ *
+ * Results are 32-bit unsigned int point values with Poisson distribution, with lambda \p lambda.
+ *
+ * \param generator - Generator to use
+ * \param outputPtr - Pointer to device memory to store CUDA-generated results, or
+ *                 Pointer to host memory to store CPU-generated results
+ * \param n - Number of unsigned ints to generate
+ * \param lambda - lambda for the Poisson distribution
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *    a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_LENGTH_NOT_MULTIPLE if the number of output samples is
+ *    not a multiple of the quasirandom dimension\n
+ * - CURAND_STATUS_DOUBLE_PRECISION_REQUIRED if the GPU or sm does not support double precision \n
+ * - CURAND_STATUS_OUT_OF_RANGE if lambda is non-positive or greater than 400,000 \n
+ * - CURAND_STATUS_SUCCESS if the results were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGeneratePoisson(curandGenerator_t generator, unsigned int *outputPtr,
+                     size_t n, double lambda);
+// just for internal usage
+curandStatus_t CURANDAPI
+curandGeneratePoissonMethod(curandGenerator_t generator, unsigned int *outputPtr,
+                     size_t n, double lambda, curandMethod_t method);
+curandStatus_t CURANDAPI
+curandGenerateBinomial(curandGenerator_t generator, unsigned int *outputPtr,
+                       size_t num, unsigned int n, double p);
+// just for internal usage
+curandStatus_t CURANDAPI
+curandGenerateBinomialMethod(curandGenerator_t generator,
+                             unsigned int *outputPtr,
+                             size_t num, unsigned int n, double p,
+                             curandMethod_t method);
+/**
+ * \brief Setup starting states.
+ *
+ * Generate the starting state of the generator.  This function is
+ * automatically called by generation functions such as
+ * ::curandGenerate() and ::curandGenerateUniform().
+ * It can be called manually for performance testing reasons to separate
+ * timings for starting state generation and random number generation.
+ *
+ * \param generator - Generator to update
+ *
+ * \return
+ * - CURAND_STATUS_ALLOCATION_FAILED if memory could not be allocated \n
+ * - CURAND_STATUS_NOT_INITIALIZED if the generator was never created \n
+ * - CURAND_STATUS_PREEXISTING_FAILURE if there was an existing error from
+ *     a previous kernel launch \n
+ * - CURAND_STATUS_LAUNCH_FAILURE if the kernel launch failed for any reason \n
+ * - CURAND_STATUS_SUCCESS if the seeds were generated successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGenerateSeeds(curandGenerator_t generator);
+/**
+ * \brief Get direction vectors for 32-bit quasirandom number generation.
+ *
+ * Get a pointer to an array of direction vectors that can be used
+ * for quasirandom number generation.  The resulting pointer will
+ * reference an array of direction vectors in host memory.
+ *
+ * The array contains vectors for many dimensions.  Each dimension
+ * has 32 vectors.  Each individual vector is an unsigned int.
+ *
+ * Legal values for \p set are:
+ * - CURAND_DIRECTION_VECTORS_32_JOEKUO6 (20,000 dimensions)
+ * - CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6 (20,000 dimensions)
+ *
+ * \param vectors - Address of pointer in which to return direction vectors
+ * \param set - Which set of direction vectors to use
+ *
+ * \return
+ * - CURAND_STATUS_OUT_OF_RANGE if the choice of set is invalid \n
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGetDirectionVectors32(curandDirectionVectors32_t *vectors[], curandDirectionVectorSet_t set);
+/**
+ * \brief Get scramble constants for 32-bit scrambled Sobol' .
+ *
+ * Get a pointer to an array of scramble constants that can be used
+ * for quasirandom number generation.  The resulting pointer will
+ * reference an array of unsinged ints in host memory.
+ *
+ * The array contains constants for many dimensions.  Each dimension
+ * has a single unsigned int constant.
+ *
+ * \param constants - Address of pointer in which to return scramble constants
+ *
+ * \return
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGetScrambleConstants32(unsigned int * * constants);
+/**
+ * \brief Get direction vectors for 64-bit quasirandom number generation.
+ *
+ * Get a pointer to an array of direction vectors that can be used
+ * for quasirandom number generation.  The resulting pointer will
+ * reference an array of direction vectors in host memory.
+ *
+ * The array contains vectors for many dimensions.  Each dimension
+ * has 64 vectors.  Each individual vector is an unsigned long long.
+ *
+ * Legal values for \p set are:
+ * - CURAND_DIRECTION_VECTORS_64_JOEKUO6 (20,000 dimensions)
+ * - CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6 (20,000 dimensions)
+ *
+ * \param vectors - Address of pointer in which to return direction vectors
+ * \param set - Which set of direction vectors to use
+ *
+ * \return
+ * - CURAND_STATUS_OUT_OF_RANGE if the choice of set is invalid \n
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGetDirectionVectors64(curandDirectionVectors64_t *vectors[], curandDirectionVectorSet_t set);
+/**
+ * \brief Get scramble constants for 64-bit scrambled Sobol' .
+ *
+ * Get a pointer to an array of scramble constants that can be used
+ * for quasirandom number generation.  The resulting pointer will
+ * reference an array of unsinged long longs in host memory.
+ *
+ * The array contains constants for many dimensions.  Each dimension
+ * has a single unsigned long long constant.
+ *
+ * \param constants - Address of pointer in which to return scramble constants
+ *
+ * \return
+ * - CURAND_STATUS_SUCCESS if the pointer was set successfully \n
+ */
+curandStatus_t CURANDAPI
+curandGetScrambleConstants64(unsigned long long * * constants);
+/** @} */
+#endif // __CUDACC_RTC__
+#if defined(__cplusplus)
+}
+#endif /* __cplusplus */
+#endif /* !defined(CURAND_H_) */