diff --git a/.gitattributes b/.gitattributes index 628226a30d9d140532e3c2070459358042cb3ccc..d9a271db913abcc1d02f3cd71f16d969920eff45 100644 --- a/.gitattributes +++ b/.gitattributes @@ -947,3 +947,239 @@ lib/python3.10/site-packages/sklearn/neighbors/_kd_tree.cpython-310-x86_64-linux lib/python3.10/site-packages/sklearn/utils/_seq_dataset.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text lib/python3.10/site-packages/sklearn/utils/_typedefs.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text lib/python3.10/site-packages/sklearn/utils/_vector_sentinel.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +doc/pdf/Cupti.pdf filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/core/groupby/__pycache__/groupby.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/io/__pycache__/pytables.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/io/__pycache__/stata.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/io/formats/__pycache__/style.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/cpu/open3d_tf_ops.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/cpu/open3d_torch_ops.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/cuda/open3d_tf_ops.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/cuda/open3d_torch_ops.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/hall_ibl.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/hall_skybox.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/konzerthaus_skybox.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/konzerthaus_ibl.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/nightlights_skybox.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/nightlights_ibl.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/park2_skybox.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/park2_ibl.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/park_skybox.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/park_ibl.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/pillars_skybox.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/pillars_ibl.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/Roboto-Bold.ttf filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/Roboto-BoldItalic.ttf filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/Roboto-Medium.ttf filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/Roboto-MediumItalic.ttf filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/streetlamp_skybox.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/streetlamp_ibl.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/brightday_skybox.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/brightday_ibl.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/crossroads_skybox.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/crossroads_ibl.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/defaultLitSSR.filamat filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/defaultLit.filamat filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/defaultLitTransparency.filamat filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/default_skybox.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/resources/default_ibl.ktx filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/cpu/pybind.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/core/__pycache__/frame.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/core/__pycache__/generic.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/core/__pycache__/series.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/core/_multiarray_umath.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/core/_multiarray_tests.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/core/_simd.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/random/lib/libnpyrandom.a filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/linalg/_umath_linalg.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/random/bit_generator.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/random/mtrand.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/random/_bounded_integers.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/random/_common.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/random/_generator.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/random/_mt19937.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/random/_pcg64.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/random/_philox.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/open3d/cuda/pybind.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/lib/__pycache__/function_base.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/lib/tests/__pycache__/test_function_base.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/ma/__pycache__/core.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/ma/tests/__pycache__/test_core.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/zmq/backend/cython/_zmq.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/core/tests/__pycache__/test_multiarray.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/core/tests/__pycache__/test_numeric.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/core/__pycache__/_add_newdocs.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/core/__pycache__/fromnumeric.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/core/lib/libnpymath.a filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/numpy/core/tests/__pycache__/test_umath.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/lib2to3/tests/__pycache__/test_fixers.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/pydoc_data/__pycache__/topics.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/tkinter/__pycache__/__init__.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/html/__pycache__/entities.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/ensurepip/_bundled/pip-23.0.1-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text +lib/python3.10/ensurepip/_bundled/setuptools-65.5.0-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/utils/sparsefuncs_fast.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/utils/_cython_blas.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/utils/_fast_dict.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/utils/_isfinite.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/utils/_random.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/utils/arrayfuncs.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/utils/murmurhash.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/_loss/_loss.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/cluster/_hierarchical_fast.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/cluster/_k_means_common.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/cluster/_k_means_elkan.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/cluster/_k_means_lloyd.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/cluster/_k_means_minibatch.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/datasets/_svmlight_format_fast.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/decomposition/_cdnmf_fast.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/decomposition/_online_lda_fast.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/ensemble/_gradient_boosting.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/linear_model/_cd_fast.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/linear_model/_sag_fast.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/linear_model/_sgd_fast.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/manifold/_barnes_hut_tsne.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/datasets/images/china.jpg filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/datasets/images/flower.jpg filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/ensemble/_hist_gradient_boosting/_predictor.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/ensemble/__pycache__/_forest.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/cluster/_hdbscan/_linkage.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/cluster/_hdbscan/_reachability.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/cluster/_hdbscan/_tree.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/utils/__pycache__/estimator_checks.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/preprocessing/__pycache__/_data.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/metrics/cluster/_expected_mutual_info_fast.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/metrics/_pairwise_distances_reduction/_argkmin.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/metrics/_pairwise_distances_reduction/_argkmin_classmode.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/metrics/_pairwise_distances_reduction/_base.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors_classmode.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/metrics/__pycache__/_classification.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/ensemble/_hist_gradient_boosting/histogram.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/sklearn/ensemble/_hist_gradient_boosting/splitting.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/triton/_C/libproton.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/google/_upb/_message.abi3.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/google/protobuf/__pycache__/descriptor_pb2.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/triton/backends/nvidia/bin/cuobjdump filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/triton/backends/nvidia/bin/nvdisasm filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/triton/backends/nvidia/bin/ptxas filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/triton/backends/nvidia/lib/libdevice.10.bc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/triton/backends/amd/lib/ocml.bc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/triton/backends/amd/lib/ockl.bc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/traitlets/__pycache__/traitlets.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/triton/_C/libtriton.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/tornado/__pycache__/web.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/tornado/test/__pycache__/web_test.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libc10.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libc10_cuda.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libcudnn.so.9 filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libcudnn_cnn.so.9 filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libcudnn_adv.so.9 filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libcudnn_engines_runtime_compiled.so.9 filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libcudnn_graph.so.9 filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libcupti-ae79a72e.so.11.8 filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libcusparseLt-f8b4a9fb.so.0 filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libtorch.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libtorch_python.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libtorch_cpu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libcudnn_heuristic.so.9 filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libcudnn_ops.so.9 filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libtorch_cuda_linalg.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/__pycache__/_meta_registrations.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/__pycache__/_tensor_docs.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/__pycache__/_torch_docs.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/__pycache__/overrides.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libcudnn_engines_precompiled.so.9 filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/bin/protoc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/bin/protoc-3.13.0.0 filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/lib/libtorch_cuda.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/fx/experimental/__pycache__/symbolic_shapes.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/distributed/__pycache__/distributed_c10d.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/utils/hipify/__pycache__/cuda_to_hip_mappings.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/testing/_internal/distributed/__pycache__/distributed_test.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/testing/_internal/distributed/rpc/__pycache__/rpc_test.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/sparse/__pycache__/_triton_ops_meta.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/testing/_internal/__pycache__/common_methods_invocations.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/testing/_internal/__pycache__/common_nn.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/testing/_internal/__pycache__/common_quantization.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/testing/_internal/__pycache__/common_utils.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/testing/_internal/generated/__pycache__/annotated_fn_args.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/nn/__pycache__/functional.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/onnx/__pycache__/symbolic_opset9.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/linalg/__pycache__/__init__.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/_inductor/__pycache__/ir.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/_inductor/__pycache__/lowering.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/_inductor/__pycache__/scheduler.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/_inductor/codegen/__pycache__/cpp.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/_dynamo/__pycache__/trace_rules.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/_decomp/__pycache__/decompositions.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/torch/_refs/__pycache__/__init__.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/tifffile/__pycache__/tifffile.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/imageio/plugins/__pycache__/_tifffile.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/igl/copyleft/pyigl_copyleft_core.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/IPython/core/__pycache__/interactiveshell.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/igl/triangle/pyigl_restricted_triangle.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/igl/copyleft/cgal/pyigl_copyleft_cgal.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/igl/embree/pyigl_embree.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/idna/__pycache__/idnadata.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/idna/__pycache__/uts46data.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/igl/copyleft/tetgen/pyigl_copyleft_tetgen.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/grpc/_cython/cygrpc.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/regex/__pycache__/test_regex.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/regex/__pycache__/_regex_core.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/rich/__pycache__/_emoji_codes.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pip/_vendor/distlib/t64.exe filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pip/_vendor/distlib/t64-arm.exe filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pip/_vendor/distlib/w64.exe filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pip/_vendor/distlib/w64-arm.exe filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pip/_vendor/rich/__pycache__/_emoji_codes.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pip/_vendor/__pycache__/typing_extensions.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pip/_vendor/idna/__pycache__/idnadata.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pip/_vendor/idna/__pycache__/uts46data.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pyparsing/__pycache__/core.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pip/_vendor/pkg_resources/__pycache__/__init__.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/PIL/__pycache__/Image.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/reshape.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/parsers.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/sas.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/testing.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/sparse.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/tslib.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/writers.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/algos.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/arrays.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/groupby.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/hashing.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/hashtable.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/index.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/internals.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/interval.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/join.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/lib.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/missing.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/ops.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/tslibs/parsing.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/tslibs/period.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/tslibs/strptime.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/tslibs/timedeltas.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/tslibs/timestamps.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/tslibs/timezones.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/tslibs/tzconversion.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/tslibs/vectorized.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/tslibs/conversion.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/tslibs/dtypes.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/tslibs/fields.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/tslibs/nattype.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/tslibs/np_datetime.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/window/aggregations.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/window/indexers.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/_libs/tslibs/offsets.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/tests/indexing/__pycache__/test_loc.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/tests/frame/__pycache__/test_constructors.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/tests/io/__pycache__/test_sql.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/tests/tools/__pycache__/test_to_datetime.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/core/strings/__pycache__/accessor.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/core/indexes/__pycache__/base.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +lib/python3.10/site-packages/pandas/core/indexes/__pycache__/multi.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..b1c1f891b53aebccfb4bae04dbcd53c048a7ce85 --- /dev/null +++ b/LICENSE @@ -0,0 +1,1598 @@ +End User License Agreement +-------------------------- + +NVIDIA Software License Agreement and CUDA Supplement to +Software License Agreement. Last updated: October 8, 2021 + +The CUDA Toolkit End User License Agreement applies to the +NVIDIA CUDA Toolkit, the NVIDIA CUDA Samples, the NVIDIA +Display Driver, NVIDIA Nsight tools (Visual Studio Edition), +and the associated documentation on CUDA APIs, programming +model and development tools. If you do not agree with the +terms and conditions of the license agreement, then do not +download or use the software. + +Last updated: October 8, 2021. + + +Preface +------- + +The Software License Agreement in Chapter 1 and the Supplement +in Chapter 2 contain license terms and conditions that govern +the use of NVIDIA software. By accepting this agreement, you +agree to comply with all the terms and conditions applicable +to the product(s) included herein. + + +NVIDIA Driver + + +Description + +This package contains the operating system driver and +fundamental system software components for NVIDIA GPUs. + + +NVIDIA CUDA Toolkit + + +Description + +The NVIDIA CUDA Toolkit provides command-line and graphical +tools for building, debugging and optimizing the performance +of applications accelerated by NVIDIA GPUs, runtime and math +libraries, and documentation including programming guides, +user manuals, and API references. + + +Default Install Location of CUDA Toolkit + +Windows platform: + +%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.# + +Linux platform: + +/usr/local/cuda-#.# + +Mac platform: + +/Developer/NVIDIA/CUDA-#.# + + +NVIDIA CUDA Samples + + +Description + +CUDA Samples are now located in +https://github.com/nvidia/cuda-samples, which includes +instructions for obtaining, building, and running the samples. +They are no longer included in the CUDA toolkit. + + +NVIDIA Nsight Visual Studio Edition (Windows only) + + +Description + +NVIDIA Nsight Development Platform, Visual Studio Edition is a +development environment integrated into Microsoft Visual +Studio that provides tools for debugging, profiling, analyzing +and optimizing your GPU computing and graphics applications. + + +Default Install Location of Nsight Visual Studio Edition + +Windows platform: + +%ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.# + + +1. License Agreement for NVIDIA Software Development Kits +--------------------------------------------------------- + + +Important Notice—Read before downloading, installing, +copying or using the licensed software: +------------------------------------------------------- + +This license agreement, including exhibits attached +("Agreement”) is a legal agreement between you and NVIDIA +Corporation ("NVIDIA") and governs your use of a NVIDIA +software development kit (“SDK”). + +Each SDK has its own set of software and materials, but here +is a description of the types of items that may be included in +a SDK: source code, header files, APIs, data sets and assets +(examples include images, textures, models, scenes, videos, +native API input/output files), binary software, sample code, +libraries, utility programs, programming code and +documentation. + +This Agreement can be accepted only by an adult of legal age +of majority in the country in which the SDK is used. + +If you are entering into this Agreement on behalf of a company +or other legal entity, you represent that you have the legal +authority to bind the entity to this Agreement, in which case +“you” will mean the entity you represent. + +If you don’t have the required age or authority to accept +this Agreement, or if you don’t accept all the terms and +conditions of this Agreement, do not download, install or use +the SDK. + +You agree to use the SDK only for purposes that are permitted +by (a) this Agreement, and (b) any applicable law, regulation +or generally accepted practices or guidelines in the relevant +jurisdictions. + + +1.1. License + + +1.1.1. License Grant + +Subject to the terms of this Agreement, NVIDIA hereby grants +you a non-exclusive, non-transferable license, without the +right to sublicense (except as expressly provided in this +Agreement) to: + + 1. Install and use the SDK, + + 2. Modify and create derivative works of sample source code + delivered in the SDK, and + + 3. Distribute those portions of the SDK that are identified + in this Agreement as distributable, as incorporated in + object code format into a software application that meets + the distribution requirements indicated in this Agreement. + + +1.1.2. Distribution Requirements + +These are the distribution requirements for you to exercise +the distribution grant: + + 1. Your application must have material additional + functionality, beyond the included portions of the SDK. + + 2. The distributable portions of the SDK shall only be + accessed by your application. + + 3. The following notice shall be included in modifications + and derivative works of sample source code distributed: + “This software contains source code provided by NVIDIA + Corporation.” + + 4. Unless a developer tool is identified in this Agreement + as distributable, it is delivered for your internal use + only. + + 5. The terms under which you distribute your application + must be consistent with the terms of this Agreement, + including (without limitation) terms relating to the + license grant and license restrictions and protection of + NVIDIA’s intellectual property rights. Additionally, you + agree that you will protect the privacy, security and + legal rights of your application users. + + 6. You agree to notify NVIDIA in writing of any known or + suspected distribution or use of the SDK not in compliance + with the requirements of this Agreement, and to enforce + the terms of your agreements with respect to distributed + SDK. + + +1.1.3. Authorized Users + +You may allow employees and contractors of your entity or of +your subsidiary(ies) to access and use the SDK from your +secure network to perform work on your behalf. + +If you are an academic institution you may allow users +enrolled or employed by the academic institution to access and +use the SDK from your secure network. + +You are responsible for the compliance with the terms of this +Agreement by your authorized users. If you become aware that +your authorized users didn’t follow the terms of this +Agreement, you agree to take reasonable steps to resolve the +non-compliance and prevent new occurrences. + + +1.1.4. Pre-Release SDK + +The SDK versions identified as alpha, beta, preview or +otherwise as pre-release, may not be fully functional, may +contain errors or design flaws, and may have reduced or +different security, privacy, accessibility, availability, and +reliability standards relative to commercial versions of +NVIDIA software and materials. Use of a pre-release SDK may +result in unexpected results, loss of data, project delays or +other unpredictable damage or loss. + +You may use a pre-release SDK at your own risk, understanding +that pre-release SDKs are not intended for use in production +or business-critical systems. + +NVIDIA may choose not to make available a commercial version +of any pre-release SDK. NVIDIA may also choose to abandon +development and terminate the availability of a pre-release +SDK at any time without liability. + + +1.1.5. Updates + +NVIDIA may, at its option, make available patches, workarounds +or other updates to this SDK. Unless the updates are provided +with their separate governing terms, they are deemed part of +the SDK licensed to you as provided in this Agreement. You +agree that the form and content of the SDK that NVIDIA +provides may change without prior notice to you. While NVIDIA +generally maintains compatibility between versions, NVIDIA may +in some cases make changes that introduce incompatibilities in +future versions of the SDK. + + +1.1.6. Components Under Other Licenses + +The SDK may come bundled with, or otherwise include or be +distributed with, NVIDIA or third-party components with +separate legal notices or terms as may be described in +proprietary notices accompanying the SDK. If and to the extent +there is a conflict between the terms in this Agreement and +the license terms associated with the component, the license +terms associated with the components control only to the +extent necessary to resolve the conflict. + +Subject to the other terms of this Agreement, you may use the +SDK to develop and test applications released under Open +Source Initiative (OSI) approved open source software +licenses. + + +1.1.7. Reservation of Rights + +NVIDIA reserves all rights, title, and interest in and to the +SDK, not expressly granted to you under this Agreement. + + +1.2. Limitations + +The following license limitations apply to your use of the +SDK: + + 1. You may not reverse engineer, decompile or disassemble, + or remove copyright or other proprietary notices from any + portion of the SDK or copies of the SDK. + + 2. Except as expressly provided in this Agreement, you may + not copy, sell, rent, sublicense, transfer, distribute, + modify, or create derivative works of any portion of the + SDK. For clarity, you may not distribute or sublicense the + SDK as a stand-alone product. + + 3. Unless you have an agreement with NVIDIA for this + purpose, you may not indicate that an application created + with the SDK is sponsored or endorsed by NVIDIA. + + 4. You may not bypass, disable, or circumvent any + encryption, security, digital rights management or + authentication mechanism in the SDK. + + 5. You may not use the SDK in any manner that would cause it + to become subject to an open source software license. As + examples, licenses that require as a condition of use, + modification, and/or distribution that the SDK be: + + a. Disclosed or distributed in source code form; + + b. Licensed for the purpose of making derivative works; + or + + c. Redistributable at no charge. + + 6. You acknowledge that the SDK as delivered is not tested + or certified by NVIDIA for use in connection with the + design, construction, maintenance, and/or operation of any + system where the use or failure of such system could + result in a situation that threatens the safety of human + life or results in catastrophic damages (each, a "Critical + Application"). Examples of Critical Applications include + use in avionics, navigation, autonomous vehicle + applications, ai solutions for automotive products, + military, medical, life support or other life critical + applications. NVIDIA shall not be liable to you or any + third party, in whole or in part, for any claims or + damages arising from such uses. You are solely responsible + for ensuring that any product or service developed with + the SDK as a whole includes sufficient features to comply + with all applicable legal and regulatory standards and + requirements. + + 7. You agree to defend, indemnify and hold harmless NVIDIA + and its affiliates, and their respective employees, + contractors, agents, officers and directors, from and + against any and all claims, damages, obligations, losses, + liabilities, costs or debt, fines, restitutions and + expenses (including but not limited to attorney’s fees + and costs incident to establishing the right of + indemnification) arising out of or related to products or + services that use the SDK in or for Critical Applications, + and for use of the SDK outside of the scope of this + Agreement or not in compliance with its terms. + + 8. You may not reverse engineer, decompile or disassemble + any portion of the output generated using SDK elements for + the purpose of translating such output artifacts to target + a non-NVIDIA platform. + + +1.3. Ownership + + 1. NVIDIA or its licensors hold all rights, title and + interest in and to the SDK and its modifications and + derivative works, including their respective intellectual + property rights, subject to your rights under Section + 1.3.2. This SDK may include software and materials from + NVIDIA’s licensors, and these licensors are intended + third party beneficiaries that may enforce this Agreement + with respect to their intellectual property rights. + + 2. You hold all rights, title and interest in and to your + applications and your derivative works of the sample + source code delivered in the SDK, including their + respective intellectual property rights, subject to + NVIDIA’s rights under Section 1.3.1. + + 3. You may, but don’t have to, provide to NVIDIA + suggestions, feature requests or other feedback regarding + the SDK, including possible enhancements or modifications + to the SDK. For any feedback that you voluntarily provide, + you hereby grant NVIDIA and its affiliates a perpetual, + non-exclusive, worldwide, irrevocable license to use, + reproduce, modify, license, sublicense (through multiple + tiers of sublicensees), and distribute (through multiple + tiers of distributors) it without the payment of any + royalties or fees to you. NVIDIA will use feedback at its + choice. NVIDIA is constantly looking for ways to improve + its products, so you may send feedback to NVIDIA through + the developer portal at https://developer.nvidia.com. + + +1.4. No Warranties + +THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL +FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND +ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND +OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING, +BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE +ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO +WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF +DEALING OR COURSE OF TRADE. + + +1.5. Limitation of Liability + +TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS +AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, +PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS +OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF +PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION +WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK, +WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH +OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), +PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF +LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES +TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS +AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE +NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS +LIMIT. + +These exclusions and limitations of liability shall apply +regardless if NVIDIA or its affiliates have been advised of +the possibility of such damages, and regardless of whether a +remedy fails its essential purpose. These exclusions and +limitations of liability form an essential basis of the +bargain between the parties, and, absent any of these +exclusions or limitations of liability, the provisions of this +Agreement, including, without limitation, the economic terms, +would be substantially different. + + +1.6. Termination + + 1. This Agreement will continue to apply until terminated by + either you or NVIDIA as described below. + + 2. If you want to terminate this Agreement, you may do so by + stopping to use the SDK. + + 3. NVIDIA may, at any time, terminate this Agreement if: + + a. (i) you fail to comply with any term of this + Agreement and the non-compliance is not fixed within + thirty (30) days following notice from NVIDIA (or + immediately if you violate NVIDIA’s intellectual + property rights); + + b. (ii) you commence or participate in any legal + proceeding against NVIDIA with respect to the SDK; or + + c. (iii) NVIDIA decides to no longer provide the SDK in + a country or, in NVIDIA’s sole discretion, the + continued use of it is no longer commercially viable. + + 4. Upon any termination of this Agreement, you agree to + promptly discontinue use of the SDK and destroy all copies + in your possession or control. Your prior distributions in + accordance with this Agreement are not affected by the + termination of this Agreement. Upon written request, you + will certify in writing that you have complied with your + commitments under this section. Upon any termination of + this Agreement all provisions survive except for the + license grant provisions. + + +1.7. General + +If you wish to assign this Agreement or your rights and +obligations, including by merger, consolidation, dissolution +or operation of law, contact NVIDIA to ask for permission. Any +attempted assignment not approved by NVIDIA in writing shall +be void and of no effect. NVIDIA may assign, delegate or +transfer this Agreement and its rights and obligations, and if +to a non-affiliate you will be notified. + +You agree to cooperate with NVIDIA and provide reasonably +requested information to verify your compliance with this +Agreement. + +This Agreement will be governed in all respects by the laws of +the United States and of the State of Delaware as those laws +are applied to contracts entered into and performed entirely +within Delaware by Delaware residents, without regard to the +conflicts of laws principles. The United Nations Convention on +Contracts for the International Sale of Goods is specifically +disclaimed. You agree to all terms of this Agreement in the +English language. + +The state or federal courts residing in Santa Clara County, +California shall have exclusive jurisdiction over any dispute +or claim arising out of this Agreement. Notwithstanding this, +you agree that NVIDIA shall still be allowed to apply for +injunctive remedies or an equivalent type of urgent legal +relief in any jurisdiction. + +If any court of competent jurisdiction determines that any +provision of this Agreement is illegal, invalid or +unenforceable, such provision will be construed as limited to +the extent necessary to be consistent with and fully +enforceable under the law and the remaining provisions will +remain in full force and effect. Unless otherwise specified, +remedies are cumulative. + +Each party acknowledges and agrees that the other is an +independent contractor in the performance of this Agreement. + +The SDK has been developed entirely at private expense and is +“commercial items” consisting of “commercial computer +software” and “commercial computer software +documentation” provided with RESTRICTED RIGHTS. Use, +duplication or disclosure by the U.S. Government or a U.S. +Government subcontractor is subject to the restrictions in +this Agreement pursuant to DFARS 227.7202-3(a) or as set forth +in subparagraphs (c)(1) and (2) of the Commercial Computer +Software - Restricted Rights clause at FAR 52.227-19, as +applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas +Expressway, Santa Clara, CA 95051. + +The SDK is subject to United States export laws and +regulations. You agree that you will not ship, transfer or +export the SDK into any country, or use the SDK in any manner, +prohibited by the United States Bureau of Industry and +Security or economic sanctions regulations administered by the +U.S. Department of Treasury’s Office of Foreign Assets +Control (OFAC), or any applicable export laws, restrictions or +regulations. These laws include restrictions on destinations, +end users and end use. By accepting this Agreement, you +confirm that you are not a resident or citizen of any country +currently embargoed by the U.S. and that you are not otherwise +prohibited from receiving the SDK. + +Any notice delivered by NVIDIA to you under this Agreement +will be delivered via mail, email or fax. You agree that any +notices that NVIDIA sends you electronically will satisfy any +legal communication requirements. Please direct your legal +notices or other correspondence to NVIDIA Corporation, 2788 +San Tomas Expressway, Santa Clara, California 95051, United +States of America, Attention: Legal Department. + +This Agreement and any exhibits incorporated into this +Agreement constitute the entire agreement of the parties with +respect to the subject matter of this Agreement and supersede +all prior negotiations or documentation exchanged between the +parties relating to this SDK license. Any additional and/or +conflicting terms on documents issued by you are null, void, +and invalid. Any amendment or waiver under this Agreement +shall be in writing and signed by representatives of both +parties. + + +2. CUDA Toolkit Supplement to Software License Agreement for +NVIDIA Software Development Kits +------------------------------------------------------------ + +The terms in this supplement govern your use of the NVIDIA +CUDA Toolkit SDK under the terms of your license agreement +(“Agreement”) as modified by this supplement. Capitalized +terms used but not defined below have the meaning assigned to +them in the Agreement. + +This supplement is an exhibit to the Agreement and is +incorporated as an integral part of the Agreement. In the +event of conflict between the terms in this supplement and the +terms in the Agreement, the terms in this supplement govern. + + +2.1. License Scope + +The SDK is licensed for you to develop applications only for +use in systems with NVIDIA GPUs. + + +2.2. Distribution + +The portions of the SDK that are distributable under the +Agreement are listed in Attachment A. + + +2.3. Operating Systems + +Those portions of the SDK designed exclusively for use on the +Linux or FreeBSD operating systems, or other operating systems +derived from the source code to these operating systems, may +be copied and redistributed for use in accordance with this +Agreement, provided that the object code files are not +modified in any way (except for unzipping of compressed +files). + + +2.4. Audio and Video Encoders and Decoders + +You acknowledge and agree that it is your sole responsibility +to obtain any additional third-party licenses required to +make, have made, use, have used, sell, import, and offer for +sale your products or services that include or incorporate any +third-party software and content relating to audio and/or +video encoders and decoders from, including but not limited +to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A., +MPEG-LA, and Coding Technologies. NVIDIA does not grant to you +under this Agreement any necessary patent or other rights with +respect to any audio and/or video encoders and decoders. + + +2.5. Licensing + +If the distribution terms in this Agreement are not suitable +for your organization, or for any questions regarding this +Agreement, please contact NVIDIA at +nvidia-compute-license-questions@nvidia.com. + + +2.6. Attachment A + +The following CUDA Toolkit files may be distributed with +Licensee Applications developed by you, including certain +variations of these files that have version number or +architecture specific information embedded in the file name - +as an example only, for release version 9.0 of the 64-bit +Windows software, the file cudart64_90.dll is redistributable. + +Component + +CUDA Runtime + +Windows + +cudart.dll, cudart_static.lib, cudadevrt.lib + +Mac OSX + +libcudart.dylib, libcudart_static.a, libcudadevrt.a + +Linux + +libcudart.so, libcudart_static.a, libcudadevrt.a + +Android + +libcudart.so, libcudart_static.a, libcudadevrt.a + +Component + +CUDA FFT Library + +Windows + +cufft.dll, cufftw.dll, cufft.lib, cufftw.lib + +Mac OSX + +libcufft.dylib, libcufft_static.a, libcufftw.dylib, +libcufftw_static.a + +Linux + +libcufft.so, libcufft_static.a, libcufftw.so, +libcufftw_static.a + +Android + +libcufft.so, libcufft_static.a, libcufftw.so, +libcufftw_static.a + +Component + +CUDA BLAS Library + +Windows + +cublas.dll, cublasLt.dll + +Mac OSX + +libcublas.dylib, libcublasLt.dylib, libcublas_static.a, +libcublasLt_static.a + +Linux + +libcublas.so, libcublasLt.so, libcublas_static.a, +libcublasLt_static.a + +Android + +libcublas.so, libcublasLt.so, libcublas_static.a, +libcublasLt_static.a + +Component + +NVIDIA "Drop-in" BLAS Library + +Windows + +nvblas.dll + +Mac OSX + +libnvblas.dylib + +Linux + +libnvblas.so + +Component + +CUDA Sparse Matrix Library + +Windows + +cusparse.dll, cusparse.lib + +Mac OSX + +libcusparse.dylib, libcusparse_static.a + +Linux + +libcusparse.so, libcusparse_static.a + +Android + +libcusparse.so, libcusparse_static.a + +Component + +CUDA Linear Solver Library + +Windows + +cusolver.dll, cusolver.lib + +Mac OSX + +libcusolver.dylib, libcusolver_static.a + +Linux + +libcusolver.so, libcusolver_static.a + +Android + +libcusolver.so, libcusolver_static.a + +Component + +CUDA Random Number Generation Library + +Windows + +curand.dll, curand.lib + +Mac OSX + +libcurand.dylib, libcurand_static.a + +Linux + +libcurand.so, libcurand_static.a + +Android + +libcurand.so, libcurand_static.a + +Component + +NVIDIA Performance Primitives Library + +Windows + +nppc.dll, nppc.lib, nppial.dll, nppial.lib, nppicc.dll, +nppicc.lib, nppicom.dll, nppicom.lib, nppidei.dll, +nppidei.lib, nppif.dll, nppif.lib, nppig.dll, nppig.lib, +nppim.dll, nppim.lib, nppist.dll, nppist.lib, nppisu.dll, +nppisu.lib, nppitc.dll, nppitc.lib, npps.dll, npps.lib + +Mac OSX + +libnppc.dylib, libnppc_static.a, libnppial.dylib, +libnppial_static.a, libnppicc.dylib, libnppicc_static.a, +libnppicom.dylib, libnppicom_static.a, libnppidei.dylib, +libnppidei_static.a, libnppif.dylib, libnppif_static.a, +libnppig.dylib, libnppig_static.a, libnppim.dylib, +libnppisu_static.a, libnppitc.dylib, libnppitc_static.a, +libnpps.dylib, libnpps_static.a + +Linux + +libnppc.so, libnppc_static.a, libnppial.so, +libnppial_static.a, libnppicc.so, libnppicc_static.a, +libnppicom.so, libnppicom_static.a, libnppidei.so, +libnppidei_static.a, libnppif.so, libnppif_static.a +libnppig.so, libnppig_static.a, libnppim.so, +libnppim_static.a, libnppist.so, libnppist_static.a, +libnppisu.so, libnppisu_static.a, libnppitc.so +libnppitc_static.a, libnpps.so, libnpps_static.a + +Android + +libnppc.so, libnppc_static.a, libnppial.so, +libnppial_static.a, libnppicc.so, libnppicc_static.a, +libnppicom.so, libnppicom_static.a, libnppidei.so, +libnppidei_static.a, libnppif.so, libnppif_static.a +libnppig.so, libnppig_static.a, libnppim.so, +libnppim_static.a, libnppist.so, libnppist_static.a, +libnppisu.so, libnppisu_static.a, libnppitc.so +libnppitc_static.a, libnpps.so, libnpps_static.a + +Component + +NVIDIA JPEG Library + +Windows + +nvjpeg.lib, nvjpeg.dll + +Linux + +libnvjpeg.so, libnvjpeg_static.a + +Component + +Internal common library required for statically linking to +cuBLAS, cuSPARSE, cuFFT, cuRAND, nvJPEG and NPP + +Mac OSX + +libculibos.a + +Linux + +libculibos.a + +Component + +NVIDIA Runtime Compilation Library and Header + +All + +nvrtc.h + +Windows + +nvrtc.dll, nvrtc-builtins.dll + +Mac OSX + +libnvrtc.dylib, libnvrtc-builtins.dylib + +Linux + +libnvrtc.so, libnvrtc-builtins.so + +Component + +NVIDIA Optimizing Compiler Library + +Windows + +nvvm.dll + +Mac OSX + +libnvvm.dylib + +Linux + +libnvvm.so + +Component + +NVIDIA Common Device Math Functions Library + +Windows + +libdevice.10.bc + +Mac OSX + +libdevice.10.bc + +Linux + +libdevice.10.bc + +Component + +CUDA Occupancy Calculation Header Library + +All + +cuda_occupancy.h + +Component + +CUDA Half Precision Headers + +All + +cuda_fp16.h, cuda_fp16.hpp + +Component + +CUDA Profiling Tools Interface (CUPTI) Library + +Windows + +cupti.dll + +Mac OSX + +libcupti.dylib + +Linux + +libcupti.so + +Component + +NVIDIA Tools Extension Library + +Windows + +nvToolsExt.dll, nvToolsExt.lib + +Mac OSX + +libnvToolsExt.dylib + +Linux + +libnvToolsExt.so + +Component + +NVIDIA CUDA Driver Libraries + +Linux + +libcuda.so, libnvidia-ptxjitcompiler.so + +Component + +NVIDIA CUDA File IO Libraries and Header + +All + +cufile.h + +Linux + +libcufile.so, libcufile_rdma.so, libcufile_static.a, +libcufile_rdma_static.a + +The NVIDIA CUDA Driver Libraries are only distributable in +applications that meet this criteria: + + 1. The application was developed starting from a NVIDIA CUDA + container obtained from Docker Hub or the NVIDIA GPU + Cloud, and + + 2. The resulting application is packaged as a Docker + container and distributed to users on Docker Hub or the + NVIDIA GPU Cloud only. + +In addition to the rights above, for parties that are +developing software intended solely for use on Jetson +development kits or Jetson modules, and running Linux for +Tegra software, the following shall apply: + + * The SDK may be distributed in its entirety, as provided by + NVIDIA, and without separation of its components, for you + and/or your licensees to create software development kits + for use only on the Jetson platform and running Linux for + Tegra software. + + +2.7. Attachment B + + +Additional Licensing Obligations + +The following third party components included in the SOFTWARE +are licensed to Licensee pursuant to the following terms and +conditions: + + 1. Licensee's use of the GDB third party component is + subject to the terms and conditions of GNU GPL v3: + + This product includes copyrighted third-party software licensed + under the terms of the GNU General Public License v3 ("GPL v3"). + All third-party software packages are copyright by their respective + authors. GPL v3 terms and conditions are hereby incorporated into + the Agreement by this reference: http://www.gnu.org/licenses/gpl.txt + + Consistent with these licensing requirements, the software + listed below is provided under the terms of the specified + open source software licenses. To obtain source code for + software provided under licenses that require + redistribution of source code, including the GNU General + Public License (GPL) and GNU Lesser General Public License + (LGPL), contact oss-requests@nvidia.com. This offer is + valid for a period of three (3) years from the date of the + distribution of this product by NVIDIA CORPORATION. + + Component License + CUDA-GDB GPL v3 + + 2. Licensee represents and warrants that any and all third + party licensing and/or royalty payment obligations in + connection with Licensee's use of the H.264 video codecs + are solely the responsibility of Licensee. + + 3. Licensee's use of the Thrust library is subject to the + terms and conditions of the Apache License Version 2.0. + All third-party software packages are copyright by their + respective authors. Apache License Version 2.0 terms and + conditions are hereby incorporated into the Agreement by + this reference. + http://www.apache.org/licenses/LICENSE-2.0.html + + In addition, Licensee acknowledges the following notice: + Thrust includes source code from the Boost Iterator, + Tuple, System, and Random Number libraries. + + Boost Software License - Version 1.0 - August 17th, 2003 + . . . . + + Permission is hereby granted, free of charge, to any person or + organization obtaining a copy of the software and accompanying + documentation covered by this license (the "Software") to use, + reproduce, display, distribute, execute, and transmit the Software, + and to prepare derivative works of the Software, and to permit + third-parties to whom the Software is furnished to do so, all + subject to the following: + + The copyright notices in the Software and this entire statement, + including the above license grant, this restriction and the following + disclaimer, must be included in all copies of the Software, in whole + or in part, and all derivative works of the Software, unless such + copies or derivative works are solely in the form of machine-executable + object code generated by a source language processor. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND + NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR + OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + 4. Licensee's use of the LLVM third party component is + subject to the following terms and conditions: + + ====================================================== + LLVM Release License + ====================================================== + University of Illinois/NCSA + Open Source License + + Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign. + All rights reserved. + + Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal with the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at Urbana- + Champaign, nor the names of its contributors may be used to endorse or + promote products derived from this Software without specific prior + written permission. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS WITH THE SOFTWARE. + + 5. Licensee's use of the PCRE third party component is + subject to the following terms and conditions: + + ------------ + PCRE LICENCE + ------------ + PCRE is a library of functions to support regular expressions whose syntax + and semantics are as close as possible to those of the Perl 5 language. + Release 8 of PCRE is distributed under the terms of the "BSD" licence, as + specified below. The documentation for PCRE, supplied in the "doc" + directory, is distributed under the same terms as the software itself. The + basic library functions are written in C and are freestanding. Also + included in the distribution is a set of C++ wrapper functions, and a just- + in-time compiler that can be used to optimize pattern matching. These are + both optional features that can be omitted when the library is built. + + THE BASIC LIBRARY FUNCTIONS + --------------------------- + Written by: Philip Hazel + Email local part: ph10 + Email domain: cam.ac.uk + University of Cambridge Computing Service, + Cambridge, England. + Copyright (c) 1997-2012 University of Cambridge + All rights reserved. + + PCRE JUST-IN-TIME COMPILATION SUPPORT + ------------------------------------- + Written by: Zoltan Herczeg + Email local part: hzmester + Emain domain: freemail.hu + Copyright(c) 2010-2012 Zoltan Herczeg + All rights reserved. + + STACK-LESS JUST-IN-TIME COMPILER + -------------------------------- + Written by: Zoltan Herczeg + Email local part: hzmester + Emain domain: freemail.hu + Copyright(c) 2009-2012 Zoltan Herczeg + All rights reserved. + + THE C++ WRAPPER FUNCTIONS + ------------------------- + Contributed by: Google Inc. + Copyright (c) 2007-2012, Google Inc. + All rights reserved. + + THE "BSD" LICENCE + ----------------- + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the name of Google + Inc. nor the names of their contributors may be used to endorse or + promote products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 6. Some of the cuBLAS library routines were written by or + derived from code written by Vasily Volkov and are subject + to the Modified Berkeley Software Distribution License as + follows: + + Copyright (c) 2007-2009, Regents of the University of California + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the University of California, Berkeley nor + the names of its contributors may be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 7. Some of the cuBLAS library routines were written by or + derived from code written by Davide Barbieri and are + subject to the Modified Berkeley Software Distribution + License as follows: + + Copyright (c) 2008-2009 Davide Barbieri @ University of Rome Tor Vergata. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * The name of the author may not be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 8. Some of the cuBLAS library routines were derived from + code developed by the University of Tennessee and are + subject to the Modified Berkeley Software Distribution + License as follows: + + Copyright (c) 2010 The University of Tennessee. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer listed in this license in the documentation and/or + other materials provided with the distribution. + * Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 9. Some of the cuBLAS library routines were written by or + derived from code written by Jonathan Hogg and are subject + to the Modified Berkeley Software Distribution License as + follows: + + Copyright (c) 2012, The Science and Technology Facilities Council (STFC). + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the STFC nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE STFC BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 10. Some of the cuBLAS library routines were written by or + derived from code written by Ahmad M. Abdelfattah, David + Keyes, and Hatem Ltaief, and are subject to the Apache + License, Version 2.0, as follows: + + -- (C) Copyright 2013 King Abdullah University of Science and Technology + Authors: + Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa) + David Keyes (david.keyes@kaust.edu.sa) + Hatem Ltaief (hatem.ltaief@kaust.edu.sa) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the King Abdullah University of Science and + Technology nor the names of its contributors may be used to endorse + or promote products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE + + 11. Some of the cuSPARSE library routines were written by or + derived from code written by Li-Wen Chang and are subject + to the NCSA Open Source License as follows: + + Copyright (c) 2012, University of Illinois. + + All rights reserved. + + Developed by: IMPACT Group, University of Illinois, http://impact.crhc.illinois.edu + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal with the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimers in the documentation and/or other materials provided + with the distribution. + * Neither the names of IMPACT Group, University of Illinois, nor + the names of its contributors may be used to endorse or promote + products derived from this Software without specific prior + written permission. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE + SOFTWARE. + + 12. Some of the cuRAND library routines were written by or + derived from code written by Mutsuo Saito and Makoto + Matsumoto and are subject to the following license: + + Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima + University. All rights reserved. + + Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima + University and University of Tokyo. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the Hiroshima University nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 13. Some of the cuRAND library routines were derived from + code developed by D. E. Shaw Research and are subject to + the following license: + + Copyright 2010-2011, D. E. Shaw Research. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions, and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of D. E. Shaw Research nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 14. Some of the Math library routines were written by or + derived from code developed by Norbert Juffa and are + subject to the following license: + + Copyright (c) 2015-2017, Norbert Juffa + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 15. Licensee's use of the lz4 third party component is + subject to the following terms and conditions: + + Copyright (C) 2011-2013, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 16. The NPP library uses code from the Boost Math Toolkit, + and is subject to the following license: + + Boost Software License - Version 1.0 - August 17th, 2003 + . . . . + + Permission is hereby granted, free of charge, to any person or + organization obtaining a copy of the software and accompanying + documentation covered by this license (the "Software") to use, + reproduce, display, distribute, execute, and transmit the Software, + and to prepare derivative works of the Software, and to permit + third-parties to whom the Software is furnished to do so, all + subject to the following: + + The copyright notices in the Software and this entire statement, + including the above license grant, this restriction and the following + disclaimer, must be included in all copies of the Software, in whole + or in part, and all derivative works of the Software, unless such + copies or derivative works are solely in the form of machine-executable + object code generated by a source language processor. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND + NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR + OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + 17. Portions of the Nsight Eclipse Edition is subject to the + following license: + + The Eclipse Foundation makes available all content in this plug-in + ("Content"). Unless otherwise indicated below, the Content is provided + to you under the terms and conditions of the Eclipse Public License + Version 1.0 ("EPL"). A copy of the EPL is available at http:// + www.eclipse.org/legal/epl-v10.html. For purposes of the EPL, "Program" + will mean the Content. + + If you did not receive this Content directly from the Eclipse + Foundation, the Content is being redistributed by another party + ("Redistributor") and different terms and conditions may apply to your + use of any object code in the Content. Check the Redistributor's + license that was provided with the Content. If no such license exists, + contact the Redistributor. Unless otherwise indicated below, the terms + and conditions of the EPL still apply to any source code in the + Content and such source code may be obtained at http://www.eclipse.org. + + 18. Some of the cuBLAS library routines uses code from + OpenAI, which is subject to the following license: + + License URL + https://github.com/openai/openai-gemm/blob/master/LICENSE + + License Text + The MIT License + + Copyright (c) 2016 OpenAI (http://openai.com), 2016 Google Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + + 19. Licensee's use of the Visual Studio Setup Configuration + Samples is subject to the following license: + + The MIT License (MIT) + Copyright (C) Microsoft Corporation. All rights reserved. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + 20. Licensee's use of linmath.h header for CPU functions for + GL vector/matrix operations from lunarG is subject to the + Apache License Version 2.0. + + 21. The DX12-CUDA sample uses the d3dx12.h header, which is + subject to the MIT license . + +----------------- diff --git a/bin/2to3 b/bin/2to3 new file mode 100644 index 0000000000000000000000000000000000000000..71d77f225d099a4ef703c125c55c9ed7d29310d2 --- /dev/null +++ b/bin/2to3 @@ -0,0 +1,5 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +import sys +from lib2to3.main import main + +sys.exit(main("lib2to3.fixes")) diff --git a/bin/2to3-3.10 b/bin/2to3-3.10 new file mode 100644 index 0000000000000000000000000000000000000000..71d77f225d099a4ef703c125c55c9ed7d29310d2 --- /dev/null +++ b/bin/2to3-3.10 @@ -0,0 +1,5 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +import sys +from lib2to3.main import main + +sys.exit(main("lib2to3.fixes")) diff --git a/bin/asn1Coding b/bin/asn1Coding new file mode 100644 index 0000000000000000000000000000000000000000..f1acbf6b88d8574b835d983c02e69c8f4fe34c46 Binary files /dev/null and b/bin/asn1Coding differ diff --git a/bin/asn1Decoding b/bin/asn1Decoding new file mode 100644 index 0000000000000000000000000000000000000000..c76bc439208f04b0f845e78add89c918e66a5706 Binary files /dev/null and b/bin/asn1Decoding differ diff --git a/bin/asn1Parser b/bin/asn1Parser new file mode 100644 index 0000000000000000000000000000000000000000..3d25df949a99e2bdef55e4b31628ce1890b6f7de Binary files /dev/null and b/bin/asn1Parser differ diff --git a/bin/bzcmp b/bin/bzcmp new file mode 100644 index 0000000000000000000000000000000000000000..bd96c27c39871d6802675b182f7cf4481bec96ca --- /dev/null +++ b/bin/bzcmp @@ -0,0 +1,76 @@ +#!/bin/sh +# sh is buggy on RS/6000 AIX 3.2. Replace above line with #!/bin/ksh + +# Bzcmp/diff wrapped for bzip2, +# adapted from zdiff by Philippe Troin for Debian GNU/Linux. + +# Bzcmp and bzdiff are used to invoke the cmp or the diff pro- +# gram on compressed files. All options specified are passed +# directly to cmp or diff. If only 1 file is specified, then +# the files compared are file1 and an uncompressed file1.gz. +# If two files are specified, then they are uncompressed (if +# necessary) and fed to cmp or diff. The exit status from cmp +# or diff is preserved. + +PATH="/usr/bin:/bin:$PATH"; export PATH +prog=`echo $0 | sed 's|.*/||'` +case "$prog" in + *cmp) comp=${CMP-cmp} ;; + *) comp=${DIFF-diff} ;; +esac + +OPTIONS= +FILES= +for ARG +do + case "$ARG" in + -*) OPTIONS="$OPTIONS $ARG";; + *) if test -f "$ARG"; then + FILES="$FILES $ARG" + else + echo "${prog}: $ARG not found or not a regular file" + exit 1 + fi ;; + esac +done +if test -z "$FILES"; then + echo "Usage: $prog [${comp}_options] file [file]" + exit 1 +fi +set $FILES +if test $# -eq 1; then + FILE=`echo "$1" | sed 's/.bz2$//'` + bzip2 -cd "$FILE.bz2" | $comp $OPTIONS - "$FILE" + STAT="$?" + +elif test $# -eq 2; then + case "$1" in + *.bz2) + case "$2" in + *.bz2) + F=`echo "$2" | sed 's|.*/||;s|.bz2$||'` + tmp=`mktemp "${TMPDIR:-/tmp}"/bzdiff.XXXXXXXXXX` || { + echo 'cannot create a temporary file' >&2 + exit 1 + } + bzip2 -cdfq "$2" > "$tmp" + bzip2 -cdfq "$1" | $comp $OPTIONS - "$tmp" + STAT="$?" + /bin/rm -f "$tmp";; + + *) bzip2 -cdfq "$1" | $comp $OPTIONS - "$2" + STAT="$?";; + esac;; + *) case "$2" in + *.bz2) + bzip2 -cdfq "$2" | $comp $OPTIONS "$1" - + STAT="$?";; + *) $comp $OPTIONS "$1" "$2" + STAT="$?";; + esac;; + esac +else + echo "Usage: $prog [${comp}_options] file [file]" + exit 1 +fi +exit "$STAT" diff --git a/bin/bzdiff b/bin/bzdiff new file mode 100644 index 0000000000000000000000000000000000000000..bd96c27c39871d6802675b182f7cf4481bec96ca --- /dev/null +++ b/bin/bzdiff @@ -0,0 +1,76 @@ +#!/bin/sh +# sh is buggy on RS/6000 AIX 3.2. Replace above line with #!/bin/ksh + +# Bzcmp/diff wrapped for bzip2, +# adapted from zdiff by Philippe Troin for Debian GNU/Linux. + +# Bzcmp and bzdiff are used to invoke the cmp or the diff pro- +# gram on compressed files. All options specified are passed +# directly to cmp or diff. If only 1 file is specified, then +# the files compared are file1 and an uncompressed file1.gz. +# If two files are specified, then they are uncompressed (if +# necessary) and fed to cmp or diff. The exit status from cmp +# or diff is preserved. + +PATH="/usr/bin:/bin:$PATH"; export PATH +prog=`echo $0 | sed 's|.*/||'` +case "$prog" in + *cmp) comp=${CMP-cmp} ;; + *) comp=${DIFF-diff} ;; +esac + +OPTIONS= +FILES= +for ARG +do + case "$ARG" in + -*) OPTIONS="$OPTIONS $ARG";; + *) if test -f "$ARG"; then + FILES="$FILES $ARG" + else + echo "${prog}: $ARG not found or not a regular file" + exit 1 + fi ;; + esac +done +if test -z "$FILES"; then + echo "Usage: $prog [${comp}_options] file [file]" + exit 1 +fi +set $FILES +if test $# -eq 1; then + FILE=`echo "$1" | sed 's/.bz2$//'` + bzip2 -cd "$FILE.bz2" | $comp $OPTIONS - "$FILE" + STAT="$?" + +elif test $# -eq 2; then + case "$1" in + *.bz2) + case "$2" in + *.bz2) + F=`echo "$2" | sed 's|.*/||;s|.bz2$||'` + tmp=`mktemp "${TMPDIR:-/tmp}"/bzdiff.XXXXXXXXXX` || { + echo 'cannot create a temporary file' >&2 + exit 1 + } + bzip2 -cdfq "$2" > "$tmp" + bzip2 -cdfq "$1" | $comp $OPTIONS - "$tmp" + STAT="$?" + /bin/rm -f "$tmp";; + + *) bzip2 -cdfq "$1" | $comp $OPTIONS - "$2" + STAT="$?";; + esac;; + *) case "$2" in + *.bz2) + bzip2 -cdfq "$2" | $comp $OPTIONS "$1" - + STAT="$?";; + *) $comp $OPTIONS "$1" "$2" + STAT="$?";; + esac;; + esac +else + echo "Usage: $prog [${comp}_options] file [file]" + exit 1 +fi +exit "$STAT" diff --git a/bin/bzegrep b/bin/bzegrep new file mode 100644 index 0000000000000000000000000000000000000000..0314ca6038b03a9d36d5b5f877cab0cdbc2425e1 --- /dev/null +++ b/bin/bzegrep @@ -0,0 +1,85 @@ +#!/bin/sh + +# Bzgrep wrapped for bzip2, +# adapted from zgrep by Philippe Troin for Debian GNU/Linux. +## zgrep notice: +## zgrep -- a wrapper around a grep program that decompresses files as needed +## Adapted from a version sent by Charles Levert + +PATH="/usr/bin:$PATH"; export PATH + +prog=`echo $0 | sed 's|.*/||'` +case "$prog" in + *egrep) grep=${EGREP-egrep} ;; + *fgrep) grep=${FGREP-fgrep} ;; + *) grep=${GREP-grep} ;; +esac +pat="" +while test $# -ne 0; do + case "$1" in + -e | -f) opt="$opt $1"; shift; pat="$1" + if test "$grep" = grep; then # grep is buggy with -e on SVR4 + grep=egrep + fi;; + -A | -B) opt="$opt $1 $2"; shift;; + -*) opt="$opt $1";; + *) if test -z "$pat"; then + pat="$1" + else + break; + fi;; + esac + shift +done + +if test -z "$pat"; then + echo "grep through bzip2 files" + echo "usage: $prog [grep_options] pattern [files]" + exit 1 +fi + +list=0 +silent=0 +op=`echo "$opt" | sed -e 's/ //g' -e 's/-//g'` +case "$op" in + *l*) list=1 +esac +case "$op" in + *h*) silent=1 +esac + +if test $# -eq 0; then + bzip2 -cdfq | $grep $opt "$pat" + exit $? +fi + +res=0 +for i do + if test -f "$i"; then :; else if test -f "$i.bz2"; then i="$i.bz2"; fi; fi + if test $list -eq 1; then + bzip2 -cdfq "$i" | $grep $opt "$pat" 2>&1 > /dev/null && echo $i + r=$? + elif test $# -eq 1 -o $silent -eq 1; then + bzip2 -cdfq "$i" | $grep $opt "$pat" + r=$? + else + j=$(echo "$i" | sed 's/\\/&&/g;s/|/\\&/g;s/&/\\&/g') + j=`printf "%s" "$j" | tr '\n' ' '` + # A trick adapted from + # https://groups.google.com/forum/#!original/comp.unix.shell/x1345iu10eg/Nn1n-1r1uU0J + # that has the same effect as the following bash code: + # bzip2 -cdfq "$i" | $grep $opt "$pat" | sed "s|^|${j}:|" + # r=${PIPESTATUS[1]} + exec 3>&1 + eval ` + exec 4>&1 >&3 3>&- + { + bzip2 -cdfq "$i" 4>&- + } | { + $grep $opt "$pat" 4>&-; echo "r=$?;" >&4 + } | sed "s|^|${j}:|" + ` + fi + test "$r" -ne 0 && res="$r" +done +exit $res diff --git a/bin/bzfgrep b/bin/bzfgrep new file mode 100644 index 0000000000000000000000000000000000000000..0314ca6038b03a9d36d5b5f877cab0cdbc2425e1 --- /dev/null +++ b/bin/bzfgrep @@ -0,0 +1,85 @@ +#!/bin/sh + +# Bzgrep wrapped for bzip2, +# adapted from zgrep by Philippe Troin for Debian GNU/Linux. +## zgrep notice: +## zgrep -- a wrapper around a grep program that decompresses files as needed +## Adapted from a version sent by Charles Levert + +PATH="/usr/bin:$PATH"; export PATH + +prog=`echo $0 | sed 's|.*/||'` +case "$prog" in + *egrep) grep=${EGREP-egrep} ;; + *fgrep) grep=${FGREP-fgrep} ;; + *) grep=${GREP-grep} ;; +esac +pat="" +while test $# -ne 0; do + case "$1" in + -e | -f) opt="$opt $1"; shift; pat="$1" + if test "$grep" = grep; then # grep is buggy with -e on SVR4 + grep=egrep + fi;; + -A | -B) opt="$opt $1 $2"; shift;; + -*) opt="$opt $1";; + *) if test -z "$pat"; then + pat="$1" + else + break; + fi;; + esac + shift +done + +if test -z "$pat"; then + echo "grep through bzip2 files" + echo "usage: $prog [grep_options] pattern [files]" + exit 1 +fi + +list=0 +silent=0 +op=`echo "$opt" | sed -e 's/ //g' -e 's/-//g'` +case "$op" in + *l*) list=1 +esac +case "$op" in + *h*) silent=1 +esac + +if test $# -eq 0; then + bzip2 -cdfq | $grep $opt "$pat" + exit $? +fi + +res=0 +for i do + if test -f "$i"; then :; else if test -f "$i.bz2"; then i="$i.bz2"; fi; fi + if test $list -eq 1; then + bzip2 -cdfq "$i" | $grep $opt "$pat" 2>&1 > /dev/null && echo $i + r=$? + elif test $# -eq 1 -o $silent -eq 1; then + bzip2 -cdfq "$i" | $grep $opt "$pat" + r=$? + else + j=$(echo "$i" | sed 's/\\/&&/g;s/|/\\&/g;s/&/\\&/g') + j=`printf "%s" "$j" | tr '\n' ' '` + # A trick adapted from + # https://groups.google.com/forum/#!original/comp.unix.shell/x1345iu10eg/Nn1n-1r1uU0J + # that has the same effect as the following bash code: + # bzip2 -cdfq "$i" | $grep $opt "$pat" | sed "s|^|${j}:|" + # r=${PIPESTATUS[1]} + exec 3>&1 + eval ` + exec 4>&1 >&3 3>&- + { + bzip2 -cdfq "$i" 4>&- + } | { + $grep $opt "$pat" 4>&-; echo "r=$?;" >&4 + } | sed "s|^|${j}:|" + ` + fi + test "$r" -ne 0 && res="$r" +done +exit $res diff --git a/bin/bzgrep b/bin/bzgrep new file mode 100644 index 0000000000000000000000000000000000000000..0314ca6038b03a9d36d5b5f877cab0cdbc2425e1 --- /dev/null +++ b/bin/bzgrep @@ -0,0 +1,85 @@ +#!/bin/sh + +# Bzgrep wrapped for bzip2, +# adapted from zgrep by Philippe Troin for Debian GNU/Linux. +## zgrep notice: +## zgrep -- a wrapper around a grep program that decompresses files as needed +## Adapted from a version sent by Charles Levert + +PATH="/usr/bin:$PATH"; export PATH + +prog=`echo $0 | sed 's|.*/||'` +case "$prog" in + *egrep) grep=${EGREP-egrep} ;; + *fgrep) grep=${FGREP-fgrep} ;; + *) grep=${GREP-grep} ;; +esac +pat="" +while test $# -ne 0; do + case "$1" in + -e | -f) opt="$opt $1"; shift; pat="$1" + if test "$grep" = grep; then # grep is buggy with -e on SVR4 + grep=egrep + fi;; + -A | -B) opt="$opt $1 $2"; shift;; + -*) opt="$opt $1";; + *) if test -z "$pat"; then + pat="$1" + else + break; + fi;; + esac + shift +done + +if test -z "$pat"; then + echo "grep through bzip2 files" + echo "usage: $prog [grep_options] pattern [files]" + exit 1 +fi + +list=0 +silent=0 +op=`echo "$opt" | sed -e 's/ //g' -e 's/-//g'` +case "$op" in + *l*) list=1 +esac +case "$op" in + *h*) silent=1 +esac + +if test $# -eq 0; then + bzip2 -cdfq | $grep $opt "$pat" + exit $? +fi + +res=0 +for i do + if test -f "$i"; then :; else if test -f "$i.bz2"; then i="$i.bz2"; fi; fi + if test $list -eq 1; then + bzip2 -cdfq "$i" | $grep $opt "$pat" 2>&1 > /dev/null && echo $i + r=$? + elif test $# -eq 1 -o $silent -eq 1; then + bzip2 -cdfq "$i" | $grep $opt "$pat" + r=$? + else + j=$(echo "$i" | sed 's/\\/&&/g;s/|/\\&/g;s/&/\\&/g') + j=`printf "%s" "$j" | tr '\n' ' '` + # A trick adapted from + # https://groups.google.com/forum/#!original/comp.unix.shell/x1345iu10eg/Nn1n-1r1uU0J + # that has the same effect as the following bash code: + # bzip2 -cdfq "$i" | $grep $opt "$pat" | sed "s|^|${j}:|" + # r=${PIPESTATUS[1]} + exec 3>&1 + eval ` + exec 4>&1 >&3 3>&- + { + bzip2 -cdfq "$i" 4>&- + } | { + $grep $opt "$pat" 4>&-; echo "r=$?;" >&4 + } | sed "s|^|${j}:|" + ` + fi + test "$r" -ne 0 && res="$r" +done +exit $res diff --git a/bin/bzip2recover b/bin/bzip2recover new file mode 100644 index 0000000000000000000000000000000000000000..6d9fb9e02c944c3055ac4801f6d7e3cd638062da Binary files /dev/null and b/bin/bzip2recover differ diff --git a/bin/bzless b/bin/bzless new file mode 100644 index 0000000000000000000000000000000000000000..d314043404976e58329f611c195d5c49eb917655 --- /dev/null +++ b/bin/bzless @@ -0,0 +1,61 @@ +#!/bin/sh + +# Bzmore wrapped for bzip2, +# adapted from zmore by Philippe Troin for Debian GNU/Linux. + +PATH="/usr/bin:$PATH"; export PATH + +prog=`echo $0 | sed 's|.*/||'` +case "$prog" in + *less) more=less ;; + *) more=more ;; +esac + +if test "`echo -n a`" = "-n a"; then + # looks like a SysV system: + n1=''; n2='\c' +else + n1='-n'; n2='' +fi +oldtty=`stty -g 2>/dev/null` +if stty -cbreak 2>/dev/null; then + cb='cbreak'; ncb='-cbreak' +else + # 'stty min 1' resets eof to ^a on both SunOS and SysV! + cb='min 1 -icanon'; ncb='icanon eof ^d' +fi +if test $? -eq 0 -a -n "$oldtty"; then + trap 'stty $oldtty 2>/dev/null; exit' 0 2 3 5 10 13 15 +else + trap 'stty $ncb echo 2>/dev/null; exit' 0 2 3 5 10 13 15 +fi + +if test $# = 0; then + if test -t 0; then + echo usage: $prog files... + else + bzip2 -cdfq | eval $more + fi +else + FIRST=1 + for FILE + do + if test $FIRST -eq 0; then + echo $n1 "--More--(Next file: $FILE)$n2" + stty $cb -echo 2>/dev/null + ANS=`dd bs=1 count=1 2>/dev/null` + stty $ncb echo 2>/dev/null + echo " " + if test "$ANS" = 'e' -o "$ANS" = 'q'; then + exit + fi + fi + if test "$ANS" != 's'; then + echo "------> $FILE <------" + bzip2 -cdfq "$FILE" | eval $more + fi + if test -t; then + FIRST=0 + fi + done +fi diff --git a/bin/bzmore b/bin/bzmore new file mode 100644 index 0000000000000000000000000000000000000000..d314043404976e58329f611c195d5c49eb917655 --- /dev/null +++ b/bin/bzmore @@ -0,0 +1,61 @@ +#!/bin/sh + +# Bzmore wrapped for bzip2, +# adapted from zmore by Philippe Troin for Debian GNU/Linux. + +PATH="/usr/bin:$PATH"; export PATH + +prog=`echo $0 | sed 's|.*/||'` +case "$prog" in + *less) more=less ;; + *) more=more ;; +esac + +if test "`echo -n a`" = "-n a"; then + # looks like a SysV system: + n1=''; n2='\c' +else + n1='-n'; n2='' +fi +oldtty=`stty -g 2>/dev/null` +if stty -cbreak 2>/dev/null; then + cb='cbreak'; ncb='-cbreak' +else + # 'stty min 1' resets eof to ^a on both SunOS and SysV! + cb='min 1 -icanon'; ncb='icanon eof ^d' +fi +if test $? -eq 0 -a -n "$oldtty"; then + trap 'stty $oldtty 2>/dev/null; exit' 0 2 3 5 10 13 15 +else + trap 'stty $ncb echo 2>/dev/null; exit' 0 2 3 5 10 13 15 +fi + +if test $# = 0; then + if test -t 0; then + echo usage: $prog files... + else + bzip2 -cdfq | eval $more + fi +else + FIRST=1 + for FILE + do + if test $FIRST -eq 0; then + echo $n1 "--More--(Next file: $FILE)$n2" + stty $cb -echo 2>/dev/null + ANS=`dd bs=1 count=1 2>/dev/null` + stty $ncb echo 2>/dev/null + echo " " + if test "$ANS" = 'e' -o "$ANS" = 'q'; then + exit + fi + fi + if test "$ANS" != 's'; then + echo "------> $FILE <------" + bzip2 -cdfq "$FILE" | eval $more + fi + if test -t; then + FIRST=0 + fi + done +fi diff --git a/bin/c_rehash b/bin/c_rehash new file mode 100644 index 0000000000000000000000000000000000000000..4a928e32395a464616f1babf289327532d43285c --- /dev/null +++ b/bin/c_rehash @@ -0,0 +1,252 @@ +#!/usr/bin/env perl + +# WARNING: do not edit! +# Generated by Makefile from tools/c_rehash.in +# Copyright 1999-2025 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the Apache License 2.0 (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +# Perl c_rehash script, scan all files in a directory +# and add symbolic links to their hash values. + +my $dir = ""; +my $prefix = "/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe"; + +my $errorcount = 0; +my $openssl = $ENV{OPENSSL} || "openssl"; +my $pwd; +my $x509hash = "-subject_hash"; +my $crlhash = "-hash"; +my $verbose = 0; +my $symlink_exists=eval {symlink("",""); 1}; +my $removelinks = 1; + +## Parse flags. +while ( $ARGV[0] =~ /^-/ ) { + my $flag = shift @ARGV; + last if ( $flag eq '--'); + if ( $flag eq '-old') { + $x509hash = "-subject_hash_old"; + $crlhash = "-hash_old"; + } elsif ( $flag eq '-h' || $flag eq '-help' ) { + help(); + } elsif ( $flag eq '-n' ) { + $removelinks = 0; + } elsif ( $flag eq '-v' ) { + $verbose++; + } + else { + print STDERR "Usage error; try -h.\n"; + exit 1; + } +} + +sub help { + print "Usage: c_rehash [-old] [-h] [-help] [-v] [dirs...]\n"; + print " -old use old-style digest\n"; + print " -h or -help print this help text\n"; + print " -v print files removed and linked\n"; + exit 0; +} + +eval "require Cwd"; +if (defined(&Cwd::getcwd)) { + $pwd=Cwd::getcwd(); +} else { + $pwd=`pwd`; + chomp($pwd); +} + +# DOS/Win32 or Unix delimiter? Prefix our installdir, then search. +my $path_delim = ($pwd =~ /^[a-z]\:/i) ? ';' : ':'; +$ENV{PATH} = "$prefix/bin" . ($ENV{PATH} ? $path_delim . $ENV{PATH} : ""); + +if (!(-f $openssl && -x $openssl)) { + my $found = 0; + foreach (split /$path_delim/, $ENV{PATH}) { + if (-f "$_/$openssl" && -x "$_/$openssl") { + $found = 1; + $openssl = "$_/$openssl"; + last; + } + } + if ($found == 0) { + print STDERR "c_rehash: rehashing skipped ('openssl' program not available)\n"; + exit 0; + } +} + +if (@ARGV) { + @dirlist = @ARGV; +} elsif ($ENV{SSL_CERT_DIR}) { + @dirlist = split /$path_delim/, $ENV{SSL_CERT_DIR}; +} else { + $dirlist[0] = "$dir/certs"; +} + +if (-d $dirlist[0]) { + chdir $dirlist[0]; + $openssl="$pwd/$openssl" if (!(-f $openssl && -x $openssl)); + chdir $pwd; +} + +foreach (@dirlist) { + if (-d $_ ) { + if ( -w $_) { + hash_dir($_); + } else { + print "Skipping $_, can't write\n"; + $errorcount++; + } + } +} +exit($errorcount); + +sub copy_file { + my ($src_fname, $dst_fname) = @_; + + if (open(my $in, "<", $src_fname)) { + if (open(my $out, ">", $dst_fname)) { + print $out $_ while (<$in>); + close $out; + } else { + warn "Cannot open $dst_fname for write, $!"; + } + close $in; + } else { + warn "Cannot open $src_fname for read, $!"; + } +} + +sub hash_dir { + my $dir = shift; + my %hashlist; + + print "Doing $dir\n"; + + if (!chdir $dir) { + print STDERR "WARNING: Cannot chdir to '$dir', $!\n"; + return; + } + + opendir(DIR, ".") || print STDERR "WARNING: Cannot opendir '.', $!\n"; + my @flist = sort readdir(DIR); + closedir DIR; + if ( $removelinks ) { + # Delete any existing symbolic links + foreach (grep {/^[\da-f]+\.r{0,1}\d+$/} @flist) { + if (-l $_) { + print "unlink $_\n" if $verbose; + unlink $_ || warn "Can't unlink $_, $!\n"; + } + } + } + FILE: foreach $fname (grep {/\.(pem|crt|cer|crl)$/} @flist) { + # Check to see if certificates and/or CRLs present. + my ($cert, $crl) = check_file($fname); + if (!$cert && !$crl) { + print STDERR "WARNING: $fname does not contain a certificate or CRL: skipping\n"; + next; + } + link_hash_cert($fname) if ($cert); + link_hash_crl($fname) if ($crl); + } + + chdir $pwd; +} + +sub check_file { + my ($is_cert, $is_crl) = (0,0); + my $fname = $_[0]; + + open(my $in, "<", $fname); + while(<$in>) { + if (/^-----BEGIN (.*)-----/) { + my $hdr = $1; + if ($hdr =~ /^(X509 |TRUSTED |)CERTIFICATE$/) { + $is_cert = 1; + last if ($is_crl); + } elsif ($hdr eq "X509 CRL") { + $is_crl = 1; + last if ($is_cert); + } + } + } + close $in; + return ($is_cert, $is_crl); +} + +sub compute_hash { + my $fh; + if ( $^O eq "VMS" ) { + # VMS uses the open through shell + # The file names are safe there and list form is unsupported + if (!open($fh, "-|", join(' ', @_))) { + print STDERR "Cannot compute hash on '$fname'\n"; + return; + } + } else { + if (!open($fh, "-|", @_)) { + print STDERR "Cannot compute hash on '$fname'\n"; + return; + } + } + return (<$fh>, <$fh>); +} + +# Link a certificate to its subject name hash value, each hash is of +# the form . where n is an integer. If the hash value already exists +# then we need to up the value of n, unless its a duplicate in which +# case we skip the link. We check for duplicates by comparing the +# certificate fingerprints + +sub link_hash_cert { + link_hash($_[0], 'cert'); +} + +# Same as above except for a CRL. CRL links are of the form .r + +sub link_hash_crl { + link_hash($_[0], 'crl'); +} + +sub link_hash { + my ($fname, $type) = @_; + my $is_cert = $type eq 'cert'; + + my ($hash, $fprint) = compute_hash($openssl, + $is_cert ? "x509" : "crl", + $is_cert ? $x509hash : $crlhash, + "-fingerprint", "-noout", + "-in", $fname); + chomp $hash; + $hash =~ s/^.*=// if !$is_cert; + chomp $fprint; + return if !$hash; + $fprint =~ s/^.*=//; + $fprint =~ tr/://d; + my $suffix = 0; + # Search for an unused hash filename + my $crlmark = $is_cert ? "" : "r"; + while(exists $hashlist{"$hash.$crlmark$suffix"}) { + # Hash matches: if fingerprint matches its a duplicate cert + if ($hashlist{"$hash.$crlmark$suffix"} eq $fprint) { + my $what = $is_cert ? 'certificate' : 'CRL'; + print STDERR "WARNING: Skipping duplicate $what $fname\n"; + return; + } + $suffix++; + } + $hash .= ".$crlmark$suffix"; + if ($symlink_exists) { + print "link $fname -> $hash\n" if $verbose; + symlink $fname, $hash || warn "Can't symlink, $!"; + } else { + print "copy $fname -> $hash\n" if $verbose; + copy_file($fname, $hash); + } + $hashlist{$hash} = $fprint; +} diff --git a/bin/captoinfo b/bin/captoinfo new file mode 100644 index 0000000000000000000000000000000000000000..4edfa66a0fa5205392cdd31bab1863e9440ef04d Binary files /dev/null and b/bin/captoinfo differ diff --git a/bin/cjpeg b/bin/cjpeg new file mode 100644 index 0000000000000000000000000000000000000000..eb2e3b7d75261b3d052d66632baa3bf0cac01e7c Binary files /dev/null and b/bin/cjpeg differ diff --git a/bin/clear b/bin/clear new file mode 100644 index 0000000000000000000000000000000000000000..703da15c531a76050a3a4778c65cd25f1904ff49 Binary files /dev/null and b/bin/clear differ diff --git a/bin/convert-caffe2-to-onnx b/bin/convert-caffe2-to-onnx new file mode 100644 index 0000000000000000000000000000000000000000..a2ca596d1cdb77cd0d919ba901cf03c9af22a4f2 --- /dev/null +++ b/bin/convert-caffe2-to-onnx @@ -0,0 +1,33 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python +# EASY-INSTALL-ENTRY-SCRIPT: 'torch==2.5.1','console_scripts','convert-caffe2-to-onnx' +import re +import sys + +# for compatibility with easy_install; see #2198 +__requires__ = 'torch==2.5.1' + +try: + from importlib.metadata import distribution +except ImportError: + try: + from importlib_metadata import distribution + except ImportError: + from pkg_resources import load_entry_point + + +def importlib_load_entry_point(spec, group, name): + dist_name, _, _ = spec.partition('==') + matches = ( + entry_point + for entry_point in distribution(dist_name).entry_points + if entry_point.group == group and entry_point.name == name + ) + return next(matches).load() + + +globals().setdefault('load_entry_point', importlib_load_entry_point) + + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(load_entry_point('torch==2.5.1', 'console_scripts', 'convert-caffe2-to-onnx')()) diff --git a/bin/convert-onnx-to-caffe2 b/bin/convert-onnx-to-caffe2 new file mode 100644 index 0000000000000000000000000000000000000000..b50912485135240e73654bbb573f17ed87322b2d --- /dev/null +++ b/bin/convert-onnx-to-caffe2 @@ -0,0 +1,33 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python +# EASY-INSTALL-ENTRY-SCRIPT: 'torch==2.5.1','console_scripts','convert-onnx-to-caffe2' +import re +import sys + +# for compatibility with easy_install; see #2198 +__requires__ = 'torch==2.5.1' + +try: + from importlib.metadata import distribution +except ImportError: + try: + from importlib_metadata import distribution + except ImportError: + from pkg_resources import load_entry_point + + +def importlib_load_entry_point(spec, group, name): + dist_name, _, _ = spec.partition('==') + matches = ( + entry_point + for entry_point in distribution(dist_name).entry_points + if entry_point.group == group and entry_point.name == name + ) + return next(matches).load() + + +globals().setdefault('load_entry_point', importlib_load_entry_point) + + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(load_entry_point('torch==2.5.1', 'console_scripts', 'convert-onnx-to-caffe2')()) diff --git a/bin/curve_keygen b/bin/curve_keygen new file mode 100644 index 0000000000000000000000000000000000000000..57f71d98c95f85d092f0e5698f782dc48cb6d027 Binary files /dev/null and b/bin/curve_keygen differ diff --git a/bin/cwebp b/bin/cwebp new file mode 100644 index 0000000000000000000000000000000000000000..c147e8ad2fa9f8c26f5a6a660268720e14794667 Binary files /dev/null and b/bin/cwebp differ diff --git a/bin/dash-generate-components b/bin/dash-generate-components new file mode 100644 index 0000000000000000000000000000000000000000..1d83f5162b9ca26372c9ce944c4de689f949ce27 --- /dev/null +++ b/bin/dash-generate-components @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from dash.development.component_generator import cli +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(cli()) diff --git a/bin/dash-update-components b/bin/dash-update-components new file mode 100644 index 0000000000000000000000000000000000000000..6548d0f74dbdab1a20af6f21f181aee03836b9a0 --- /dev/null +++ b/bin/dash-update-components @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from dash.development.update_components import cli +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(cli()) diff --git a/bin/debugpy b/bin/debugpy new file mode 100644 index 0000000000000000000000000000000000000000..6907ff8b29a3c56282172f73c3d934199ff759b5 --- /dev/null +++ b/bin/debugpy @@ -0,0 +1,11 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python + +# -*- coding: utf-8 -*- +import re +import sys + +from debugpy.server.cli import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/djpeg b/bin/djpeg new file mode 100644 index 0000000000000000000000000000000000000000..b150781f5d5d8b4f7aaa85830a9068e6e0322a28 Binary files /dev/null and b/bin/djpeg differ diff --git a/bin/dwebp b/bin/dwebp new file mode 100644 index 0000000000000000000000000000000000000000..eccae0b7b1baeaa29314cf5c9d01ce09cbda46dc Binary files /dev/null and b/bin/dwebp differ diff --git a/bin/emnist_get_data.sh b/bin/emnist_get_data.sh new file mode 100644 index 0000000000000000000000000000000000000000..eaabf370a723b4813344c1e82a44e2cc28621da2 --- /dev/null +++ b/bin/emnist_get_data.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +if [ -d emnist_data ]; then + echo "emnist_data directory already present, exiting" + exit 1 +fi + +mkdir emnist_data +pushd emnist_data +#wget http://biometrics.nist.gov/cs_links/EMNIST/gzip.zip +wget http://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip +unzip gzip.zip +rm -f gzip.zip +mv gzip/* . +rmdir gzip +popd diff --git a/bin/emnist_preview b/bin/emnist_preview new file mode 100644 index 0000000000000000000000000000000000000000..5cb28413e8b7f4594e8a9b54b5ff77d2e68e1d34 --- /dev/null +++ b/bin/emnist_preview @@ -0,0 +1,44 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 + +import random +import argparse +from mnist import MNIST + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument("--id", default=None, type=int, + help="ID (position) of the letter to show") + parser.add_argument("--training", action="store_true", + help="Use training set instead of testing set") + parser.add_argument("--dataset", default="digits", + help="EMNIST dataset to load") + parser.add_argument("--data", default="./emnist_data", + help="Path to MNIST data dir") + + args = parser.parse_args() + + mn = MNIST(args.data) + mn.select_emnist(args.dataset) + + if args.training: + img, label = mn.load_training() + else: + img, label = mn.load_testing() + + if args.id: + which = args.id + else: + which = random.randrange(0, len(label)) + + print('Showing id {}, num: {}'.format(which, label[which])) + + # letters dataset uses A=1 B=2 ... + if args.dataset == 'letters': + print('Letter "{}"'.format(chr(label[which] + ord('a') - 1))) + + print(mn.display(img[which])) + wat = img[which] + #import IPython + #IPython.embed() diff --git a/bin/emnist_repack b/bin/emnist_repack new file mode 100644 index 0000000000000000000000000000000000000000..ef3833d54b779a727ae9ccd9429f9113ab52fcfe --- /dev/null +++ b/bin/emnist_repack @@ -0,0 +1,43 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 + +import argparse +import os.path +from mnist import MNIST +from mnist import img_packer + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument("--data", default="./emnist_data", + help="Path to MNIST data dir") + parser.add_argument("--output", default=None, + help="Where to save result") + + args = parser.parse_args() + + DATASETS = ["balanced", "byclass", "bymerge", + "digits", "letters", "mnist"] + + mn = MNIST(args.data) + + if not args.output: + dest = args.data + train_img_fname = 'rf_' + mn.train_img_fname + test_img_fname = 'rf_' + mn.test_img_fname + else: + dest = args.output + train_img_fname = mn.train_img_fname + test_img_fname = mn.test_img_fname + + for dt_name in DATASETS: + mn.select_emnist(dt_name) + + print("========procesing {} dataset========".format(dt_name)) + + tra_img, _ = mn.load_training() + img_packer(dest, train_img_fname, + tra_img, gzip=True) + + tes_img, _ = mn.load_testing() + img_packer(dest, test_img_fname, + tes_img, gzip=True) diff --git a/bin/f2py b/bin/f2py new file mode 100644 index 0000000000000000000000000000000000000000..11ba0a36676348b2afbb55e735de76601e8524ee --- /dev/null +++ b/bin/f2py @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from numpy.f2py.f2py2e import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/f2py3 b/bin/f2py3 new file mode 100644 index 0000000000000000000000000000000000000000..11ba0a36676348b2afbb55e735de76601e8524ee --- /dev/null +++ b/bin/f2py3 @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from numpy.f2py.f2py2e import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/f2py3.10 b/bin/f2py3.10 new file mode 100644 index 0000000000000000000000000000000000000000..11ba0a36676348b2afbb55e735de76601e8524ee --- /dev/null +++ b/bin/f2py3.10 @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from numpy.f2py.f2py2e import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/fax2ps b/bin/fax2ps new file mode 100644 index 0000000000000000000000000000000000000000..535847114236ec3f6f594b56c69ca16dc496fe81 Binary files /dev/null and b/bin/fax2ps differ diff --git a/bin/fax2tiff b/bin/fax2tiff new file mode 100644 index 0000000000000000000000000000000000000000..974f44d5260c6b473de35b07da2e004f9353a9bd Binary files /dev/null and b/bin/fax2tiff differ diff --git a/bin/flask b/bin/flask new file mode 100644 index 0000000000000000000000000000000000000000..cb8ce8ee7dfc199766fe4c2297a815c1f04a0782 --- /dev/null +++ b/bin/flask @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from flask.cli import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/fonttools b/bin/fonttools new file mode 100644 index 0000000000000000000000000000000000000000..e7e5191e55369087f064067149c9dd67eec49cfe --- /dev/null +++ b/bin/fonttools @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from fontTools.__main__ import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/ftfy b/bin/ftfy new file mode 100644 index 0000000000000000000000000000000000000000..553124892508390b9fa8d5bb4be1994f693293cf --- /dev/null +++ b/bin/ftfy @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from ftfy.cli import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/get_objgraph b/bin/get_objgraph new file mode 100644 index 0000000000000000000000000000000000000000..4a4d5f16b504d9eaf9b65911bbd22b6a875d10c5 --- /dev/null +++ b/bin/get_objgraph @@ -0,0 +1,54 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# +# Author: Mike McKerns (mmckerns @caltech and @uqfoundation) +# Copyright (c) 2008-2016 California Institute of Technology. +# Copyright (c) 2016-2025 The Uncertainty Quantification Foundation. +# License: 3-clause BSD. The full license text is available at: +# - https://github.com/uqfoundation/dill/blob/master/LICENSE +""" +display the reference paths for objects in ``dill.types`` or a .pkl file + +Notes: + the generated image is useful in showing the pointer references in + objects that are or can be pickled. Any object in ``dill.objects`` + listed in ``dill.load_types(picklable=True, unpicklable=True)`` works. + +Examples:: + + $ get_objgraph ArrayType + Image generated as ArrayType.png +""" + +import dill as pickle +#pickle.debug.trace(True) +#import pickle + +# get all objects for testing +from dill import load_types +load_types(pickleable=True,unpickleable=True) +from dill import objects + +if __name__ == "__main__": + import sys + if len(sys.argv) != 2: + print ("Please provide exactly one file or type name (e.g. 'IntType')") + msg = "\n" + for objtype in list(objects.keys())[:40]: + msg += objtype + ', ' + print (msg + "...") + else: + objtype = str(sys.argv[-1]) + try: + obj = objects[objtype] + except KeyError: + obj = pickle.load(open(objtype,'rb')) + import os + objtype = os.path.splitext(objtype)[0] + try: + import objgraph + objgraph.show_refs(obj, filename=objtype+'.png') + except ImportError: + print ("Please install 'objgraph' to view object graphs") + + +# EOF diff --git a/bin/imageio_download_bin b/bin/imageio_download_bin new file mode 100644 index 0000000000000000000000000000000000000000..66a82b5f24b5d0b115aeb0abbdedd607330bf349 --- /dev/null +++ b/bin/imageio_download_bin @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from imageio.__main__ import download_bin_main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(download_bin_main()) diff --git a/bin/libdeflate-gunzip b/bin/libdeflate-gunzip new file mode 100644 index 0000000000000000000000000000000000000000..14c6c1f6e3e40364ecd3410d344de412fb3eeefe Binary files /dev/null and b/bin/libdeflate-gunzip differ diff --git a/bin/opj_dump b/bin/opj_dump new file mode 100644 index 0000000000000000000000000000000000000000..4a4c6bbc4a1385a3f19f5d2f1b5a88c071b3a1d0 Binary files /dev/null and b/bin/opj_dump differ diff --git a/bin/pal2rgb b/bin/pal2rgb new file mode 100644 index 0000000000000000000000000000000000000000..e788493b2f136bfd16ddd15811e3bdf5679c0899 Binary files /dev/null and b/bin/pal2rgb differ diff --git a/bin/pip b/bin/pip new file mode 100644 index 0000000000000000000000000000000000000000..c3379a8167e48fa40842d350255804dbd9d089f3 --- /dev/null +++ b/bin/pip @@ -0,0 +1,10 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys + +from pip._internal.cli.main import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/pip3 b/bin/pip3 new file mode 100644 index 0000000000000000000000000000000000000000..c3379a8167e48fa40842d350255804dbd9d089f3 --- /dev/null +++ b/bin/pip3 @@ -0,0 +1,10 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys + +from pip._internal.cli.main import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/plotly_get_chrome b/bin/plotly_get_chrome new file mode 100644 index 0000000000000000000000000000000000000000..247f76e3764850400f57bf5bf38b5d227c6b71c7 --- /dev/null +++ b/bin/plotly_get_chrome @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from plotly.io._kaleido import get_chrome +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(get_chrome()) diff --git a/bin/png-fix-itxt b/bin/png-fix-itxt new file mode 100644 index 0000000000000000000000000000000000000000..41ad09ec6583fef6cfb9e64ef8cc291234ed7818 Binary files /dev/null and b/bin/png-fix-itxt differ diff --git a/bin/pngfix b/bin/pngfix new file mode 100644 index 0000000000000000000000000000000000000000..91ada91ff5b87b08893b5612f808c26b9f96f402 Binary files /dev/null and b/bin/pngfix differ diff --git a/bin/ppm2tiff b/bin/ppm2tiff new file mode 100644 index 0000000000000000000000000000000000000000..7cba37b65f07f9994c3a1b54c66f20bfd87f5739 Binary files /dev/null and b/bin/ppm2tiff differ diff --git a/bin/psicc b/bin/psicc new file mode 100644 index 0000000000000000000000000000000000000000..5614a66852d836e81d4efa1c7b78c5229d665bd3 Binary files /dev/null and b/bin/psicc differ diff --git a/bin/pyav b/bin/pyav new file mode 100644 index 0000000000000000000000000000000000000000..e67e34a36b7880d376f80b0bcd6883c4aa90acd4 --- /dev/null +++ b/bin/pyav @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from av.__main__ import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/pydoc b/bin/pydoc new file mode 100644 index 0000000000000000000000000000000000000000..5d5fcc0ff427b00e5ff679652fc2385f171af093 --- /dev/null +++ b/bin/pydoc @@ -0,0 +1,5 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 + +import pydoc +if __name__ == '__main__': + pydoc.cli() diff --git a/bin/pydoc3 b/bin/pydoc3 new file mode 100644 index 0000000000000000000000000000000000000000..5d5fcc0ff427b00e5ff679652fc2385f171af093 --- /dev/null +++ b/bin/pydoc3 @@ -0,0 +1,5 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 + +import pydoc +if __name__ == '__main__': + pydoc.cli() diff --git a/bin/pydoc3.10 b/bin/pydoc3.10 new file mode 100644 index 0000000000000000000000000000000000000000..5d5fcc0ff427b00e5ff679652fc2385f171af093 --- /dev/null +++ b/bin/pydoc3.10 @@ -0,0 +1,5 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 + +import pydoc +if __name__ == '__main__': + pydoc.cli() diff --git a/bin/pyftmerge b/bin/pyftmerge new file mode 100644 index 0000000000000000000000000000000000000000..3713ce7cd8c73cf8bc59a8e6ca555a48b8322d52 --- /dev/null +++ b/bin/pyftmerge @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from fontTools.merge import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/pyftsubset b/bin/pyftsubset new file mode 100644 index 0000000000000000000000000000000000000000..07469fa87a6ff8add5d2d8d4a19d588efea083b8 --- /dev/null +++ b/bin/pyftsubset @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from fontTools.subset import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/pygmentize b/bin/pygmentize new file mode 100644 index 0000000000000000000000000000000000000000..3449153829a4265e6a3b5376bcf516a037081749 --- /dev/null +++ b/bin/pygmentize @@ -0,0 +1,10 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys + +from pygments.cmdline import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/pyrsa-decrypt b/bin/pyrsa-decrypt new file mode 100644 index 0000000000000000000000000000000000000000..a38cf86fd16033f9a585b2f47f49f36b1e6bf515 --- /dev/null +++ b/bin/pyrsa-decrypt @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from rsa.cli import decrypt +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(decrypt()) diff --git a/bin/pyrsa-encrypt b/bin/pyrsa-encrypt new file mode 100644 index 0000000000000000000000000000000000000000..dec43966d817a65514629ff3d70baf4a986d5148 --- /dev/null +++ b/bin/pyrsa-encrypt @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from rsa.cli import encrypt +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(encrypt()) diff --git a/bin/pyrsa-keygen b/bin/pyrsa-keygen new file mode 100644 index 0000000000000000000000000000000000000000..fcd9fb1033aecec65ef8b1bc6d6473e5042a330d --- /dev/null +++ b/bin/pyrsa-keygen @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from rsa.cli import keygen +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(keygen()) diff --git a/bin/pyrsa-priv2pub b/bin/pyrsa-priv2pub new file mode 100644 index 0000000000000000000000000000000000000000..9e3729fcde3369ade8f4526edeb93e82c1366b37 --- /dev/null +++ b/bin/pyrsa-priv2pub @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from rsa.util import private_to_public +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(private_to_public()) diff --git a/bin/pyrsa-sign b/bin/pyrsa-sign new file mode 100644 index 0000000000000000000000000000000000000000..2c0c50d69e2f469da8240863931ca87f65316149 --- /dev/null +++ b/bin/pyrsa-sign @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from rsa.cli import sign +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(sign()) diff --git a/bin/pyrsa-verify b/bin/pyrsa-verify new file mode 100644 index 0000000000000000000000000000000000000000..9057367950613706dfd42f4a93a6f770d98dd541 --- /dev/null +++ b/bin/pyrsa-verify @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from rsa.cli import verify +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(verify()) diff --git a/bin/python3-config b/bin/python3-config new file mode 100644 index 0000000000000000000000000000000000000000..32c498bc3fd14b79095032722f845ae47e7978f6 --- /dev/null +++ b/bin/python3-config @@ -0,0 +1,116 @@ +#!/bin/sh + +# Keep this script in sync with python-config.in + +exit_with_usage () +{ + echo "Usage: $0 --prefix|--exec-prefix|--includes|--libs|--cflags|--ldflags|--extension-suffix|--help|--abiflags|--configdir|--embed" + exit $1 +} + +if [ "$1" = "" ] ; then + exit_with_usage 1 +fi + +# Returns the actual prefix where this script was installed to. +installed_prefix () +{ + RESULT=$(dirname $(cd $(dirname "$1") && pwd -P)) + if which readlink >/dev/null 2>&1 ; then + if readlink -f "$RESULT" >/dev/null 2>&1; then + RESULT=$(readlink -f "$RESULT") + fi + fi + echo $RESULT +} + +prefix_real=$(installed_prefix "$0") + +# Use sed to fix paths from their built-to locations to their installed-to +# locations. Keep prefix & exec_prefix using their original values in case +# they are referenced in other configure variables, to prevent double +# substitution, issue #22140. +prefix="/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe" +exec_prefix="${prefix}" +exec_prefix_real=${prefix_real} +includedir=$(echo "${prefix}/include" | sed "s#$prefix#$prefix_real#") +libdir=$(echo "${exec_prefix}/lib" | sed "s#$prefix#$prefix_real#") +CFLAGS=$(echo "-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O3 -ffunction-sections -pipe -isystem /home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/include -fdebug-prefix-map=/croot/python-split_1749128751284/work=/usr/local/src/conda/python-3.10.18 -fdebug-prefix-map=/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe=/usr/local/src/conda-prefix -fuse-linker-plugin -ffat-lto-objects -flto-partition=none -flto" | sed "s#$prefix#$prefix_real#") +VERSION="3.10" +LIBM="-lm" +LIBC="" +SYSLIBS="$LIBM $LIBC" +ABIFLAGS="" +LIBS=" -lcrypt -lpthread -ldl -lutil -lm $SYSLIBS" +LIBS_EMBED="-lpython${VERSION}${ABIFLAGS} -lcrypt -lpthread -ldl -lutil -lm $SYSLIBS" +BASECFLAGS=" -Wno-unused-result -Wsign-compare" +LDLIBRARY="libpython${VERSION}${ABIFLAGS}.a" +OPT="-DNDEBUG -fwrapv -O3 -Wall" +PY_ENABLE_SHARED="0" +LDVERSION="${VERSION}${ABIFLAGS}" +LIBDEST=${prefix_real}/lib/python${VERSION} +LIBPL=$(echo "${prefix}/lib/python3.10/config-${VERSION}${ABIFLAGS}-x86_64-linux-gnu" | sed "s#$prefix#$prefix_real#") +SO=".cpython-310-x86_64-linux-gnu.so" +PYTHONFRAMEWORK="" +INCDIR="-I$includedir/python${VERSION}${ABIFLAGS}" +PLATINCDIR="-I$includedir/python${VERSION}${ABIFLAGS}" +PY_EMBED=0 + +# Scan for --help or unknown argument. +for ARG in $* +do + case $ARG in + --help) + exit_with_usage 0 + ;; + --embed) + PY_EMBED=1 + ;; + --prefix|--exec-prefix|--includes|--libs|--cflags|--ldflags|--extension-suffix|--abiflags|--configdir) + ;; + *) + exit_with_usage 1 + ;; + esac +done + +if [ $PY_EMBED = 1 ] ; then + LIBS="$LIBS_EMBED" +fi + +for ARG in "$@" +do + case "$ARG" in + --prefix) + echo "$prefix_real" + ;; + --exec-prefix) + echo "$exec_prefix_real" + ;; + --includes) + echo "$INCDIR $PLATINCDIR" + ;; + --cflags) + echo "$INCDIR $PLATINCDIR $BASECFLAGS $CFLAGS $OPT" + ;; + --libs) + echo "$LIBS" + ;; + --ldflags) + LIBPLUSED= + if [ "$PY_ENABLE_SHARED" = "0" ] ; then + LIBPLUSED="-L$LIBPL" + fi + echo "$LIBPLUSED -L$libdir $LIBS" + ;; + --extension-suffix) + echo "$SO" + ;; + --abiflags) + echo "$ABIFLAGS" + ;; + --configdir) + echo "$LIBPL" + ;; +esac +done diff --git a/bin/python3.10-config b/bin/python3.10-config new file mode 100644 index 0000000000000000000000000000000000000000..32c498bc3fd14b79095032722f845ae47e7978f6 --- /dev/null +++ b/bin/python3.10-config @@ -0,0 +1,116 @@ +#!/bin/sh + +# Keep this script in sync with python-config.in + +exit_with_usage () +{ + echo "Usage: $0 --prefix|--exec-prefix|--includes|--libs|--cflags|--ldflags|--extension-suffix|--help|--abiflags|--configdir|--embed" + exit $1 +} + +if [ "$1" = "" ] ; then + exit_with_usage 1 +fi + +# Returns the actual prefix where this script was installed to. +installed_prefix () +{ + RESULT=$(dirname $(cd $(dirname "$1") && pwd -P)) + if which readlink >/dev/null 2>&1 ; then + if readlink -f "$RESULT" >/dev/null 2>&1; then + RESULT=$(readlink -f "$RESULT") + fi + fi + echo $RESULT +} + +prefix_real=$(installed_prefix "$0") + +# Use sed to fix paths from their built-to locations to their installed-to +# locations. Keep prefix & exec_prefix using their original values in case +# they are referenced in other configure variables, to prevent double +# substitution, issue #22140. +prefix="/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe" +exec_prefix="${prefix}" +exec_prefix_real=${prefix_real} +includedir=$(echo "${prefix}/include" | sed "s#$prefix#$prefix_real#") +libdir=$(echo "${exec_prefix}/lib" | sed "s#$prefix#$prefix_real#") +CFLAGS=$(echo "-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O3 -ffunction-sections -pipe -isystem /home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/include -fdebug-prefix-map=/croot/python-split_1749128751284/work=/usr/local/src/conda/python-3.10.18 -fdebug-prefix-map=/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe=/usr/local/src/conda-prefix -fuse-linker-plugin -ffat-lto-objects -flto-partition=none -flto" | sed "s#$prefix#$prefix_real#") +VERSION="3.10" +LIBM="-lm" +LIBC="" +SYSLIBS="$LIBM $LIBC" +ABIFLAGS="" +LIBS=" -lcrypt -lpthread -ldl -lutil -lm $SYSLIBS" +LIBS_EMBED="-lpython${VERSION}${ABIFLAGS} -lcrypt -lpthread -ldl -lutil -lm $SYSLIBS" +BASECFLAGS=" -Wno-unused-result -Wsign-compare" +LDLIBRARY="libpython${VERSION}${ABIFLAGS}.a" +OPT="-DNDEBUG -fwrapv -O3 -Wall" +PY_ENABLE_SHARED="0" +LDVERSION="${VERSION}${ABIFLAGS}" +LIBDEST=${prefix_real}/lib/python${VERSION} +LIBPL=$(echo "${prefix}/lib/python3.10/config-${VERSION}${ABIFLAGS}-x86_64-linux-gnu" | sed "s#$prefix#$prefix_real#") +SO=".cpython-310-x86_64-linux-gnu.so" +PYTHONFRAMEWORK="" +INCDIR="-I$includedir/python${VERSION}${ABIFLAGS}" +PLATINCDIR="-I$includedir/python${VERSION}${ABIFLAGS}" +PY_EMBED=0 + +# Scan for --help or unknown argument. +for ARG in $* +do + case $ARG in + --help) + exit_with_usage 0 + ;; + --embed) + PY_EMBED=1 + ;; + --prefix|--exec-prefix|--includes|--libs|--cflags|--ldflags|--extension-suffix|--abiflags|--configdir) + ;; + *) + exit_with_usage 1 + ;; + esac +done + +if [ $PY_EMBED = 1 ] ; then + LIBS="$LIBS_EMBED" +fi + +for ARG in "$@" +do + case "$ARG" in + --prefix) + echo "$prefix_real" + ;; + --exec-prefix) + echo "$exec_prefix_real" + ;; + --includes) + echo "$INCDIR $PLATINCDIR" + ;; + --cflags) + echo "$INCDIR $PLATINCDIR $BASECFLAGS $CFLAGS $OPT" + ;; + --libs) + echo "$LIBS" + ;; + --ldflags) + LIBPLUSED= + if [ "$PY_ENABLE_SHARED" = "0" ] ; then + LIBPLUSED="-L$LIBPL" + fi + echo "$LIBPLUSED -L$libdir $LIBS" + ;; + --extension-suffix) + echo "$SO" + ;; + --abiflags) + echo "$ABIFLAGS" + ;; + --configdir) + echo "$LIBPL" + ;; +esac +done diff --git a/bin/pytorch3d_implicitron_runner b/bin/pytorch3d_implicitron_runner new file mode 100644 index 0000000000000000000000000000000000000000..540a831edf0b22e99800f2e59fe72977cddd0741 --- /dev/null +++ b/bin/pytorch3d_implicitron_runner @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from pytorch3d.implicitron_trainer.experiment import experiment +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(experiment()) diff --git a/bin/pytorch3d_implicitron_visualizer b/bin/pytorch3d_implicitron_visualizer new file mode 100644 index 0000000000000000000000000000000000000000..0ea39e36529db4dfe1ac4397eb5811ecba5a570d --- /dev/null +++ b/bin/pytorch3d_implicitron_visualizer @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from pytorch3d.implicitron_trainer.visualize_reconstruction import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/raw2tiff b/bin/raw2tiff new file mode 100644 index 0000000000000000000000000000000000000000..93d9967c8dfa50401caa518732aad8c81e8b296b Binary files /dev/null and b/bin/raw2tiff differ diff --git a/bin/rdjpgcom b/bin/rdjpgcom new file mode 100644 index 0000000000000000000000000000000000000000..86278bc741eb7e3d02251eed43ee88034ef22535 Binary files /dev/null and b/bin/rdjpgcom differ diff --git a/bin/renderer b/bin/renderer new file mode 100644 index 0000000000000000000000000000000000000000..000dbb4f0e23d1dac708ddb57fae721a77f65d16 --- /dev/null +++ b/bin/renderer @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from dash.development.build_process import renderer +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(renderer()) diff --git a/bin/reset b/bin/reset new file mode 100644 index 0000000000000000000000000000000000000000..7bd1d42789af918f9142590b2bdd5003cbbbedc6 Binary files /dev/null and b/bin/reset differ diff --git a/bin/sqlite3_analyzer b/bin/sqlite3_analyzer new file mode 100644 index 0000000000000000000000000000000000000000..bdb8c967181ba45160c3417c625b32c1270daedf --- /dev/null +++ b/bin/sqlite3_analyzer @@ -0,0 +1,899 @@ +#! /bin/sh +# restart with tclsh \ +exec tclsh "$0" ${1+"$@"} +package require sqlite3 + +# Run this TCL script using an SQLite-enabled TCL interpreter to get a report +# on how much disk space is used by a particular data to actually store data +# versus how much space is unused. +# +# The dbstat virtual table is required. +# + +if {[catch { + +# Argument $tname is the name of a table within the database opened by +# database handle [db]. Return true if it is a WITHOUT ROWID table, or +# false otherwise. +# +proc is_without_rowid {tname} { + set t [string map {' ''} $tname] + db eval "PRAGMA index_list = '$t'" o { + if {$o(origin) == "pk"} { + set n $o(name) + if {0==[db one { SELECT count(*) FROM sqlite_schema WHERE name=$n }]} { + return 1 + } + } + } + return 0 +} + +# Read and run TCL commands from standard input. Used to implement +# the --tclsh option. +# +proc tclsh {} { + set line {} + while {![eof stdin]} { + if {$line!=""} { + puts -nonewline "> " + } else { + puts -nonewline "% " + } + flush stdout + append line [gets stdin] + if {[info complete $line]} { + if {[catch {uplevel #0 $line} result]} { + puts stderr "Error: $result" + } elseif {$result!=""} { + puts $result + } + set line {} + } else { + append line \n + } + } +} + + +# Get the name of the database to analyze +# +proc usage {} { + set argv0 [file rootname [file tail [info script]]] + puts stderr "Usage: $argv0 ?--pageinfo? ?--stats? database-filename" + puts stderr { +Analyze the SQLite3 database file specified by the "database-filename" +argument and output a report detailing size and storage efficiency +information for the database and its constituent tables and indexes. + +Options: + + --pageinfo Show how each page of the database-file is used + + --stats Output SQL text that creates a new database containing + statistics about the database that was analyzed + + --tclsh Run the built-in TCL interpreter interactively (for debugging) + + --version Show the version number of SQLite +} + exit 1 +} +set file_to_analyze {} +set flags(-pageinfo) 0 +set flags(-stats) 0 +set flags(-debug) 0 +append argv {} +foreach arg $argv { + if {[regexp {^-+pageinfo$} $arg]} { + set flags(-pageinfo) 1 + } elseif {[regexp {^-+stats$} $arg]} { + set flags(-stats) 1 + } elseif {[regexp {^-+debug$} $arg]} { + set flags(-debug) 1 + } elseif {[regexp {^-+tclsh$} $arg]} { + tclsh + exit 0 + } elseif {[regexp {^-+version$} $arg]} { + sqlite3 mem :memory: + puts [mem one {SELECT sqlite_version()||' '||sqlite_source_id()}] + mem close + exit 0 + } elseif {[regexp {^-} $arg]} { + puts stderr "Unknown option: $arg" + usage + } elseif {$file_to_analyze!=""} { + usage + } else { + set file_to_analyze $arg + } +} +if {$file_to_analyze==""} usage +set root_filename $file_to_analyze +regexp {^file:(//)?([^?]*)} $file_to_analyze all x1 root_filename +if {![file exists $root_filename]} { + puts stderr "No such file: $root_filename" + exit 1 +} +if {![file readable $root_filename]} { + puts stderr "File is not readable: $root_filename" + exit 1 +} +set true_file_size [file size $root_filename] +if {$true_file_size<512} { + puts stderr "Empty or malformed database: $root_filename" + exit 1 +} + +# Compute the total file size assuming test_multiplexor is being used. +# Assume that SQLITE_ENABLE_8_3_NAMES might be enabled +# +set extension [file extension $root_filename] +set pattern $root_filename +append pattern {[0-3][0-9][0-9]} +foreach f [glob -nocomplain $pattern] { + incr true_file_size [file size $f] + set extension {} +} +if {[string length $extension]>=2 && [string length $extension]<=4} { + set pattern [file rootname $root_filename] + append pattern {.[0-3][0-9][0-9]} + foreach f [glob -nocomplain $pattern] { + incr true_file_size [file size $f] + } +} + +# Open the database +# +if {[catch {sqlite3 db $file_to_analyze -uri 1} msg]} { + puts stderr "error trying to open $file_to_analyze: $msg" + exit 1 +} +if {$flags(-debug)} { + proc dbtrace {txt} {puts $txt; flush stdout;} + db trace ::dbtrace +} + +# Make sure all required compile-time options are available +# +if {![db exists {SELECT 1 FROM pragma_compile_options + WHERE compile_options='ENABLE_DBSTAT_VTAB'}]} { + puts "The SQLite database engine linked with this application\ + lacks required capabilities. Recompile using the\ + -DSQLITE_ENABLE_DBSTAT_VTAB compile-time option to fix\ + this problem." + exit 1 +} + +db eval {SELECT count(*) FROM sqlite_schema} +set pageSize [expr {wide([db one {PRAGMA page_size}])}] + +if {$flags(-pageinfo)} { + db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} + db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} { + puts "$pageno $name $path" + } + exit 0 +} +if {$flags(-stats)} { + db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} + puts "BEGIN;" + puts "CREATE TABLE stats(" + puts " name STRING, /* Name of table or index */" + puts " path INTEGER, /* Path to page from root */" + puts " pageno INTEGER, /* Page number */" + puts " pagetype STRING, /* 'internal', 'leaf' or 'overflow' */" + puts " ncell INTEGER, /* Cells on page (0 for overflow) */" + puts " payload INTEGER, /* Bytes of payload on this page */" + puts " unused INTEGER, /* Bytes of unused space on this page */" + puts " mx_payload INTEGER, /* Largest payload size of all cells */" + puts " pgoffset INTEGER, /* Offset of page in file */" + puts " pgsize INTEGER /* Size of the page */" + puts ");" + db eval {SELECT quote(name) || ',' || + quote(path) || ',' || + quote(pageno) || ',' || + quote(pagetype) || ',' || + quote(ncell) || ',' || + quote(payload) || ',' || + quote(unused) || ',' || + quote(mx_payload) || ',' || + quote(pgoffset) || ',' || + quote(pgsize) AS x FROM stat} { + puts "INSERT INTO stats VALUES($x);" + } + puts "COMMIT;" + exit 0 +} + + +# In-memory database for collecting statistics. This script loops through +# the tables and indices in the database being analyzed, adding a row for each +# to an in-memory database (for which the schema is shown below). It then +# queries the in-memory db to produce the space-analysis report. +# +sqlite3 mem :memory: +if {$flags(-debug)} { + proc dbtrace {txt} {puts $txt; flush stdout;} + mem trace ::dbtrace +} +set tabledef {CREATE TABLE space_used( + name clob, -- Name of a table or index in the database file + tblname clob, -- Name of associated table + is_index boolean, -- TRUE if it is an index, false for a table + is_without_rowid boolean, -- TRUE if WITHOUT ROWID table + nentry int, -- Number of entries in the BTree + leaf_entries int, -- Number of leaf entries + depth int, -- Depth of the b-tree + payload int, -- Total amount of data stored in this table or index + ovfl_payload int, -- Total amount of data stored on overflow pages + ovfl_cnt int, -- Number of entries that use overflow + mx_payload int, -- Maximum payload size + int_pages int, -- Number of interior pages used + leaf_pages int, -- Number of leaf pages used + ovfl_pages int, -- Number of overflow pages used + int_unused int, -- Number of unused bytes on interior pages + leaf_unused int, -- Number of unused bytes on primary pages + ovfl_unused int, -- Number of unused bytes on overflow pages + gap_cnt int, -- Number of gaps in the page layout + compressed_size int -- Total bytes stored on disk +);} +mem eval $tabledef + +# Create a temporary "dbstat" virtual table. +# +db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} +db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat + ORDER BY name, path} +db eval {DROP TABLE temp.stat} + +set isCompressed 0 +set compressOverhead 0 +set depth 0 +set sql { SELECT name, tbl_name FROM sqlite_schema WHERE rootpage>0 } +foreach {name tblname} [concat sqlite_schema sqlite_schema [db eval $sql]] { + + set is_index [expr {$name!=$tblname}] + set is_without_rowid [is_without_rowid $name] + db eval { + SELECT + sum(ncell) AS nentry, + sum((pagetype=='leaf')*ncell) AS leaf_entries, + sum(payload) AS payload, + sum((pagetype=='overflow') * payload) AS ovfl_payload, + sum(path LIKE '%+000000') AS ovfl_cnt, + max(mx_payload) AS mx_payload, + sum(pagetype=='internal') AS int_pages, + sum(pagetype=='leaf') AS leaf_pages, + sum(pagetype=='overflow') AS ovfl_pages, + sum((pagetype=='internal') * unused) AS int_unused, + sum((pagetype=='leaf') * unused) AS leaf_unused, + sum((pagetype=='overflow') * unused) AS ovfl_unused, + sum(pgsize) AS compressed_size, + max((length(CASE WHEN path LIKE '%+%' THEN '' ELSE path END)+3)/4) + AS depth + FROM temp.dbstat WHERE name = $name + } break + + set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] + set storage [expr {$total_pages*$pageSize}] + if {!$isCompressed && $storage>$compressed_size} { + set isCompressed 1 + set compressOverhead 14 + } + + # Column 'gap_cnt' is set to the number of non-contiguous entries in the + # list of pages visited if the b-tree structure is traversed in a top-down + # fashion (each node visited before its child-tree is passed). Any overflow + # chains present are traversed from start to finish before any child-tree + # is. + # + set gap_cnt 0 + set prev 0 + db eval { + SELECT pageno, pagetype FROM temp.dbstat + WHERE name=$name + ORDER BY pageno + } { + if {$prev>0 && $pagetype=="leaf" && $pageno!=$prev+1} { + incr gap_cnt + } + set prev $pageno + } + mem eval { + INSERT INTO space_used VALUES( + $name, + $tblname, + $is_index, + $is_without_rowid, + $nentry, + $leaf_entries, + $depth, + $payload, + $ovfl_payload, + $ovfl_cnt, + $mx_payload, + $int_pages, + $leaf_pages, + $ovfl_pages, + $int_unused, + $leaf_unused, + $ovfl_unused, + $gap_cnt, + $compressed_size + ); + } +} + +proc integerify {real} { + if {[string is double -strict $real]} { + return [expr {wide($real)}] + } else { + return 0 + } +} +mem function int integerify + +# Quote a string for use in an SQL query. Examples: +# +# [quote {hello world}] == {'hello world'} +# [quote {hello world's}] == {'hello world''s'} +# +proc quote {txt} { + return [string map {' ''} $txt] +} + +# Output a title line +# +proc titleline {title} { + if {$title==""} { + puts [string repeat * 79] + } else { + set len [string length $title] + set stars [string repeat * [expr {79-$len-5}]] + puts "*** $title $stars" + } +} + +# Generate a single line of output in the statistics section of the +# report. +# +proc statline {title value {extra {}}} { + set len [string length $title] + set dots [string repeat . [expr {50-$len}]] + set len [string length $value] + set sp2 [string range { } $len end] + if {$extra ne ""} { + set extra " $extra" + } + puts "$title$dots $value$sp2$extra" +} + +# Generate a formatted percentage value for $num/$denom +# +proc percent {num denom {of {}}} { + if {$denom==0.0} {return ""} + set v [expr {$num*100.0/$denom}] + set of {} + if {$v==100.0 || $v<0.001 || ($v>1.0 && $v<99.0)} { + return [format {%5.1f%% %s} $v $of] + } elseif {$v<0.1 || $v>99.9} { + return [format {%7.3f%% %s} $v $of] + } else { + return [format {%6.2f%% %s} $v $of] + } +} + +proc divide {num denom} { + if {$denom==0} {return 0.0} + return [format %.2f [expr {double($num)/double($denom)}]] +} + +# Generate a subreport that covers some subset of the database. +# the $where clause determines which subset to analyze. +# +proc subreport {title where showFrag} { + global pageSize file_pgcnt compressOverhead + + # Query the in-memory database for the sum of various statistics + # for the subset of tables/indices identified by the WHERE clause in + # $where. Note that even if the WHERE clause matches no rows, the + # following query returns exactly one row (because it is an aggregate). + # + # The results of the query are stored directly by SQLite into local + # variables (i.e. $nentry, $payload etc.). + # + mem eval " + SELECT + int(sum( + CASE WHEN (is_without_rowid OR is_index) THEN nentry + ELSE leaf_entries + END + )) AS nentry, + int(sum(payload)) AS payload, + int(sum(ovfl_payload)) AS ovfl_payload, + max(mx_payload) AS mx_payload, + int(sum(ovfl_cnt)) as ovfl_cnt, + int(sum(leaf_pages)) AS leaf_pages, + int(sum(int_pages)) AS int_pages, + int(sum(ovfl_pages)) AS ovfl_pages, + int(sum(leaf_unused)) AS leaf_unused, + int(sum(int_unused)) AS int_unused, + int(sum(ovfl_unused)) AS ovfl_unused, + int(sum(gap_cnt)) AS gap_cnt, + int(sum(compressed_size)) AS compressed_size, + int(max(depth)) AS depth, + count(*) AS cnt + FROM space_used WHERE $where" {} {} + + # Output the sub-report title, nicely decorated with * characters. + # + puts "" + titleline $title + puts "" + + # Calculate statistics and store the results in TCL variables, as follows: + # + # total_pages: Database pages consumed. + # total_pages_percent: Pages consumed as a percentage of the file. + # storage: Bytes consumed. + # payload_percent: Payload bytes used as a percentage of $storage. + # total_unused: Unused bytes on pages. + # avg_payload: Average payload per btree entry. + # avg_fanout: Average fanout for internal pages. + # avg_unused: Average unused bytes per btree entry. + # avg_meta: Average metadata overhead per entry. + # ovfl_cnt_percent: Percentage of btree entries that use overflow pages. + # + set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] + set total_pages_percent [percent $total_pages $file_pgcnt] + set storage [expr {$total_pages*$pageSize}] + set payload_percent [percent $payload $storage {of storage consumed}] + set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}] + set avg_payload [divide $payload $nentry] + set avg_unused [divide $total_unused $nentry] + set total_meta [expr {$storage - $payload - $total_unused}] + set total_meta [expr {$total_meta + 4*($ovfl_pages - $ovfl_cnt)}] + set meta_percent [percent $total_meta $storage {of metadata}] + set avg_meta [divide $total_meta $nentry] + if {$int_pages>0} { + # TODO: Is this formula correct? + set nTab [mem eval " + SELECT count(*) FROM ( + SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0 + ) + "] + set avg_fanout [mem eval " + SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used + WHERE $where + "] + set avg_fanout [format %.2f $avg_fanout] + } + set ovfl_cnt_percent [percent $ovfl_cnt $nentry {of all entries}] + + # Print out the sub-report statistics. + # + statline {Percentage of total database} $total_pages_percent + statline {Number of entries} $nentry + statline {Bytes of storage consumed} $storage + if {$compressed_size!=$storage} { + set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}] + set pct [expr {$compressed_size*100.0/$storage}] + set pct [format {%5.1f%%} $pct] + statline {Bytes used after compression} $compressed_size $pct + } + statline {Bytes of payload} $payload $payload_percent + statline {Bytes of metadata} $total_meta $meta_percent + if {$cnt==1} {statline {B-tree depth} $depth} + statline {Average payload per entry} $avg_payload + statline {Average unused bytes per entry} $avg_unused + statline {Average metadata per entry} $avg_meta + if {[info exists avg_fanout]} { + statline {Average fanout} $avg_fanout + } + if {$showFrag && $total_pages>1} { + set fragmentation [percent $gap_cnt [expr {$total_pages-1}]] + statline {Non-sequential pages} $gap_cnt $fragmentation + } + statline {Maximum payload per entry} $mx_payload + statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent + if {$int_pages>0} { + statline {Index pages used} $int_pages + } + statline {Primary pages used} $leaf_pages + statline {Overflow pages used} $ovfl_pages + statline {Total pages used} $total_pages + if {$int_unused>0} { + set int_unused_percent [ + percent $int_unused [expr {$int_pages*$pageSize}] {of index space}] + statline "Unused bytes on index pages" $int_unused $int_unused_percent + } + statline "Unused bytes on primary pages" $leaf_unused [ + percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}] + statline "Unused bytes on overflow pages" $ovfl_unused [ + percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}] + statline "Unused bytes on all pages" $total_unused [ + percent $total_unused $storage {of all space}] + return 1 +} + +# Calculate the overhead in pages caused by auto-vacuum. +# +# This procedure calculates and returns the number of pages used by the +# auto-vacuum 'pointer-map'. If the database does not support auto-vacuum, +# then 0 is returned. The two arguments are the size of the database file in +# pages and the page size used by the database (in bytes). +proc autovacuum_overhead {filePages pageSize} { + + # Set $autovacuum to non-zero for databases that support auto-vacuum. + set autovacuum [db one {PRAGMA auto_vacuum}] + + # If the database is not an auto-vacuum database or the file consists + # of one page only then there is no overhead for auto-vacuum. Return zero. + if {0==$autovacuum || $filePages==1} { + return 0 + } + + # The number of entries on each pointer map page. The layout of the + # database file is one pointer-map page, followed by $ptrsPerPage other + # pages, followed by a pointer-map page etc. The first pointer-map page + # is the second page of the file overall. + set ptrsPerPage [expr {double($pageSize/5)}] + + # Return the number of pointer map pages in the database. + return [expr {wide(ceil(($filePages-1.0)/($ptrsPerPage+1.0)))}] +} + + +# Calculate the summary statistics for the database and store the results +# in TCL variables. They are output below. Variables are as follows: +# +# pageSize: Size of each page in bytes. +# file_bytes: File size in bytes. +# file_pgcnt: Number of pages in the file. +# file_pgcnt2: Number of pages in the file (calculated). +# av_pgcnt: Pages consumed by the auto-vacuum pointer-map. +# av_percent: Percentage of the file consumed by auto-vacuum pointer-map. +# inuse_pgcnt: Data pages in the file. +# inuse_percent: Percentage of pages used to store data. +# free_pgcnt: Free pages calculated as ( - ) +# free_pgcnt2: Free pages in the file according to the file header. +# free_percent: Percentage of file consumed by free pages (calculated). +# free_percent2: Percentage of file consumed by free pages (header). +# ntable: Number of tables in the db. +# nindex: Number of indices in the db. +# nautoindex: Number of indices created automatically. +# nmanindex: Number of indices created manually. +# user_payload: Number of bytes of payload in table btrees +# (not including sqlite_schema) +# user_percent: $user_payload as a percentage of total file size. + +### The following, setting $file_bytes based on the actual size of the file +### on disk, causes this tool to choke on zipvfs databases. So set it based +### on the return of [PRAGMA page_count] instead. +if 0 { + set file_bytes [file size $file_to_analyze] + set file_pgcnt [expr {$file_bytes/$pageSize}] +} +set file_pgcnt [db one {PRAGMA page_count}] +set file_bytes [expr {$file_pgcnt * $pageSize}] + +set av_pgcnt [autovacuum_overhead $file_pgcnt $pageSize] +set av_percent [percent $av_pgcnt $file_pgcnt] + +set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used} +set inuse_pgcnt [expr {wide([mem eval $sql])}] +set inuse_percent [percent $inuse_pgcnt $file_pgcnt] + +set free_pgcnt [expr {$file_pgcnt-$inuse_pgcnt-$av_pgcnt}] +set free_percent [percent $free_pgcnt $file_pgcnt] +set free_pgcnt2 [db one {PRAGMA freelist_count}] +set free_percent2 [percent $free_pgcnt2 $file_pgcnt] + +set file_pgcnt2 [expr {$inuse_pgcnt+$free_pgcnt2+$av_pgcnt}] + +# Account for the lockbyte page +if {$file_pgcnt2*$pageSize>1073742335} {incr file_pgcnt2} + +set ntable [db eval {SELECT count(*)+1 FROM sqlite_schema WHERE type='table'}] +set nindex [db eval {SELECT count(*) FROM sqlite_schema WHERE type='index'}] +set sql {SELECT count(*) FROM sqlite_schema WHERE name LIKE 'sqlite_autoindex%'} +set nautoindex [db eval $sql] +set nmanindex [expr {$nindex-$nautoindex}] + +# set total_payload [mem eval "SELECT sum(payload) FROM space_used"] +set user_payload [mem one {SELECT int(sum(payload)) FROM space_used + WHERE NOT is_index AND name NOT LIKE 'sqlite_schema'}] +set user_percent [percent $user_payload $file_bytes] + +# Output the summary statistics calculated above. +# +puts "/** Disk-Space Utilization Report For $root_filename" +puts "" +statline {Page size in bytes} $pageSize +statline {Pages in the whole file (measured)} $file_pgcnt +statline {Pages in the whole file (calculated)} $file_pgcnt2 +statline {Pages that store data} $inuse_pgcnt $inuse_percent +statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2 +statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent +statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent +statline {Number of tables in the database} $ntable +statline {Number of indices} $nindex +statline {Number of defined indices} $nmanindex +statline {Number of implied indices} $nautoindex +if {$isCompressed} { + statline {Size of uncompressed content in bytes} $file_bytes + set efficiency [percent $true_file_size $file_bytes] + statline {Size of compressed file on disk} $true_file_size $efficiency +} else { + statline {Size of the file in bytes} $file_bytes +} +statline {Bytes of user payload stored} $user_payload $user_percent + +# Output table rankings +# +puts "" +titleline "Page counts for all tables with their indices" +puts "" +mem eval {SELECT tblname, count(*) AS cnt, + int(sum(int_pages+leaf_pages+ovfl_pages)) AS size + FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} { + statline [string toupper $tblname] $size [percent $size $file_pgcnt] +} +puts "" +titleline "Page counts for all tables and indices separately" +puts "" +mem eval { + SELECT + upper(name) AS nm, + int(int_pages+leaf_pages+ovfl_pages) AS size + FROM space_used + ORDER BY size+0 DESC, name} {} { + statline $nm $size [percent $size $file_pgcnt] +} +if {$isCompressed} { + puts "" + titleline "Bytes of disk space used after compression" + puts "" + set csum 0 + mem eval {SELECT tblname, + int(sum(compressed_size)) + + $compressOverhead*sum(int_pages+leaf_pages+ovfl_pages) + AS csize + FROM space_used GROUP BY tblname ORDER BY csize+0 DESC, tblname} {} { + incr csum $csize + statline [string toupper $tblname] $csize [percent $csize $true_file_size] + } + set overhead [expr {$true_file_size - $csum}] + if {$overhead>0} { + statline {Header and free space} $overhead [percent $overhead $true_file_size] + } +} + +# Output subreports +# +if {$nindex>0} { + subreport {All tables and indices} 1 0 +} +subreport {All tables} {NOT is_index} 0 +if {$nindex>0} { + subreport {All indices} {is_index} 0 +} +foreach tbl [mem eval {SELECT DISTINCT tblname name FROM space_used + ORDER BY name}] { + set qn [quote $tbl] + set name [string toupper $tbl] + set n [mem eval {SELECT count(*) FROM space_used WHERE tblname=$tbl}] + if {$n>1} { + set idxlist [mem eval "SELECT name FROM space_used + WHERE tblname='$qn' AND is_index + ORDER BY 1"] + subreport "Table $name and all its indices" "tblname='$qn'" 0 + subreport "Table $name w/o any indices" "name='$qn'" 1 + if {[llength $idxlist]>1} { + subreport "Indices of table $name" "tblname='$qn' AND is_index" 0 + } + foreach idx $idxlist { + set qidx [quote $idx] + subreport "Index [string toupper $idx] of table $name" "name='$qidx'" 1 + } + } else { + subreport "Table $name" "name='$qn'" 1 + } +} + +# Output instructions on what the numbers above mean. +# +puts "" +titleline Definitions +puts { +Page size in bytes + + The number of bytes in a single page of the database file. + Usually 1024. + +Number of pages in the whole file +} +puts " The number of $pageSize-byte pages that go into forming the complete + database" +puts { +Pages that store data + + The number of pages that store data, either as primary B*Tree pages or + as overflow pages. The number at the right is the data pages divided by + the total number of pages in the file. + +Pages on the freelist + + The number of pages that are not currently in use but are reserved for + future use. The percentage at the right is the number of freelist pages + divided by the total number of pages in the file. + +Pages of auto-vacuum overhead + + The number of pages that store data used by the database to facilitate + auto-vacuum. This is zero for databases that do not support auto-vacuum. + +Number of tables in the database + + The number of tables in the database, including the SQLITE_SCHEMA table + used to store schema information. + +Number of indices + + The total number of indices in the database. + +Number of defined indices + + The number of indices created using an explicit CREATE INDEX statement. + +Number of implied indices + + The number of indices used to implement PRIMARY KEY or UNIQUE constraints + on tables. + +Size of the file in bytes + + The total amount of disk space used by the entire database files. + +Bytes of user payload stored + + The total number of bytes of user payload stored in the database. The + schema information in the SQLITE_SCHEMA table is not counted when + computing this number. The percentage at the right shows the payload + divided by the total file size. + +Percentage of total database + + The amount of the complete database file that is devoted to storing + information described by this category. + +Number of entries + + The total number of B-Tree key/value pairs stored under this category. + +Bytes of storage consumed + + The total amount of disk space required to store all B-Tree entries + under this category. The is the total number of pages used times + the pages size. + +Bytes of payload + + The amount of payload stored under this category. Payload is the data + part of table entries and the key part of index entries. The percentage + at the right is the bytes of payload divided by the bytes of storage + consumed. + +Bytes of metadata + + The amount of formatting and structural information stored in the + table or index. Metadata includes the btree page header, the cell pointer + array, the size field for each cell, the left child pointer or non-leaf + cells, the overflow pointers for overflow cells, and the rowid value for + rowid table cells. In other words, metadata is everything that is neither + unused space nor content. The record header in the payload is counted as + content, not metadata. + +Average payload per entry + + The average amount of payload on each entry. This is just the bytes of + payload divided by the number of entries. + +Average unused bytes per entry + + The average amount of free space remaining on all pages under this + category on a per-entry basis. This is the number of unused bytes on + all pages divided by the number of entries. + +Non-sequential pages + + The number of pages in the table or index that are out of sequence. + Many filesystems are optimized for sequential file access so a small + number of non-sequential pages might result in faster queries, + especially for larger database files that do not fit in the disk cache. + Note that after running VACUUM, the root page of each table or index is + at the beginning of the database file and all other pages are in a + separate part of the database file, resulting in a single non- + sequential page. + +Maximum payload per entry + + The largest payload size of any entry. + +Entries that use overflow + + The number of entries that user one or more overflow pages. + +Total pages used + + This is the number of pages used to hold all information in the current + category. This is the sum of index, primary, and overflow pages. + +Index pages used + + This is the number of pages in a table B-tree that hold only key (rowid) + information and no data. + +Primary pages used + + This is the number of B-tree pages that hold both key and data. + +Overflow pages used + + The total number of overflow pages used for this category. + +Unused bytes on index pages + + The total number of bytes of unused space on all index pages. The + percentage at the right is the number of unused bytes divided by the + total number of bytes on index pages. + +Unused bytes on primary pages + + The total number of bytes of unused space on all primary pages. The + percentage at the right is the number of unused bytes divided by the + total number of bytes on primary pages. + +Unused bytes on overflow pages + + The total number of bytes of unused space on all overflow pages. The + percentage at the right is the number of unused bytes divided by the + total number of bytes on overflow pages. + +Unused bytes on all pages + + The total number of bytes of unused space on all primary and overflow + pages. The percentage at the right is the number of unused bytes + divided by the total number of bytes. +} + +# Output a dump of the in-memory database. This can be used for more +# complex offline analysis. +# +titleline {} +puts "The entire text of this report can be sourced into any SQL database" +puts "engine for further analysis. All of the text above is an SQL comment." +puts "The data used to generate this report follows:" +puts "*/" +puts "BEGIN;" +puts $tabledef +unset -nocomplain x +mem eval {SELECT * FROM space_used} x { + puts -nonewline "INSERT INTO space_used VALUES" + set sep ( + foreach col $x(*) { + set v $x($col) + if {$v=="" || ![string is double $v]} {set v '[quote $v]'} + puts -nonewline $sep$v + set sep , + } + puts ");" +} +puts "COMMIT;" + +} err]} { + puts "ERROR: $err" + puts $errorInfo + exit 1 +} diff --git a/bin/tic b/bin/tic new file mode 100644 index 0000000000000000000000000000000000000000..4edfa66a0fa5205392cdd31bab1863e9440ef04d Binary files /dev/null and b/bin/tic differ diff --git a/bin/tiffcomment b/bin/tiffcomment new file mode 100644 index 0000000000000000000000000000000000000000..c7eac5d91c54bdae70802a6a3540fc2573fad27b --- /dev/null +++ b/bin/tiffcomment @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from tifffile.tiffcomment import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/tiffcp b/bin/tiffcp new file mode 100644 index 0000000000000000000000000000000000000000..1f0e34b46c6d8b00b17a03c7bc2d6f79c5179918 Binary files /dev/null and b/bin/tiffcp differ diff --git a/bin/tiffdither b/bin/tiffdither new file mode 100644 index 0000000000000000000000000000000000000000..2c114846af042d1538206bbe7c1816bbea7f686a Binary files /dev/null and b/bin/tiffdither differ diff --git a/bin/tiffdump b/bin/tiffdump new file mode 100644 index 0000000000000000000000000000000000000000..0b13beebd0004c63118bd4d9845479d61ab65e4b Binary files /dev/null and b/bin/tiffdump differ diff --git a/bin/tifffile b/bin/tifffile new file mode 100644 index 0000000000000000000000000000000000000000..795a60f7b571f0e8f28323d1bf413e4a029f952c --- /dev/null +++ b/bin/tifffile @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from tifffile import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/tiffinfo b/bin/tiffinfo new file mode 100644 index 0000000000000000000000000000000000000000..de9f95ace9944f6c6feac86c9591a9b0b766e5d0 Binary files /dev/null and b/bin/tiffinfo differ diff --git a/bin/tiffmedian b/bin/tiffmedian new file mode 100644 index 0000000000000000000000000000000000000000..a4f86928dcb97f06c36204b3f3b7882600260335 Binary files /dev/null and b/bin/tiffmedian differ diff --git a/bin/tiffset b/bin/tiffset new file mode 100644 index 0000000000000000000000000000000000000000..333de9cdbbffc7bea9bb3c8b0e97d3f21bf0590f Binary files /dev/null and b/bin/tiffset differ diff --git a/bin/tiffsplit b/bin/tiffsplit new file mode 100644 index 0000000000000000000000000000000000000000..7b5f87e56651dbfe09bf6efbbf6227654f5b03c8 Binary files /dev/null and b/bin/tiffsplit differ diff --git a/bin/tificc b/bin/tificc new file mode 100644 index 0000000000000000000000000000000000000000..ffbea5eaa4ab99b96fbee9f5cac2db63388b6108 Binary files /dev/null and b/bin/tificc differ diff --git a/bin/tjbench b/bin/tjbench new file mode 100644 index 0000000000000000000000000000000000000000..6c9edf7de9b9d07cf7f2c38d31fcec4166431619 Binary files /dev/null and b/bin/tjbench differ diff --git a/bin/toe b/bin/toe new file mode 100644 index 0000000000000000000000000000000000000000..920ede6533a5209593e07c728694a4444440dd7a Binary files /dev/null and b/bin/toe differ diff --git a/bin/torchfrtrace b/bin/torchfrtrace new file mode 100644 index 0000000000000000000000000000000000000000..14ed8fa3406b858f0f05e7b38cc45ae345dcf458 --- /dev/null +++ b/bin/torchfrtrace @@ -0,0 +1,33 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python +# EASY-INSTALL-ENTRY-SCRIPT: 'torch==2.5.1','console_scripts','torchfrtrace' +import re +import sys + +# for compatibility with easy_install; see #2198 +__requires__ = 'torch==2.5.1' + +try: + from importlib.metadata import distribution +except ImportError: + try: + from importlib_metadata import distribution + except ImportError: + from pkg_resources import load_entry_point + + +def importlib_load_entry_point(spec, group, name): + dist_name, _, _ = spec.partition('==') + matches = ( + entry_point + for entry_point in distribution(dist_name).entry_points + if entry_point.group == group and entry_point.name == name + ) + return next(matches).load() + + +globals().setdefault('load_entry_point', importlib_load_entry_point) + + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(load_entry_point('torch==2.5.1', 'console_scripts', 'torchfrtrace')()) diff --git a/bin/torchrun b/bin/torchrun new file mode 100644 index 0000000000000000000000000000000000000000..5a07d114dfd395e58f7e9cc4772854a4a473fa0b --- /dev/null +++ b/bin/torchrun @@ -0,0 +1,33 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python +# EASY-INSTALL-ENTRY-SCRIPT: 'torch==2.5.1','console_scripts','torchrun' +import re +import sys + +# for compatibility with easy_install; see #2198 +__requires__ = 'torch==2.5.1' + +try: + from importlib.metadata import distribution +except ImportError: + try: + from importlib_metadata import distribution + except ImportError: + from pkg_resources import load_entry_point + + +def importlib_load_entry_point(spec, group, name): + dist_name, _, _ = spec.partition('==') + matches = ( + entry_point + for entry_point in distribution(dist_name).entry_points + if entry_point.group == group and entry_point.name == name + ) + return next(matches).load() + + +globals().setdefault('load_entry_point', importlib_load_entry_point) + + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(load_entry_point('torch==2.5.1', 'console_scripts', 'torchrun')()) diff --git a/bin/tput b/bin/tput new file mode 100644 index 0000000000000000000000000000000000000000..f288a415499d70e08db52bb19005d3f8150ce694 Binary files /dev/null and b/bin/tput differ diff --git a/bin/tqdm b/bin/tqdm new file mode 100644 index 0000000000000000000000000000000000000000..87771d70e8580be06fdbba5337691ddf0d7adaa2 --- /dev/null +++ b/bin/tqdm @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from tqdm.cli import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/transicc b/bin/transicc new file mode 100644 index 0000000000000000000000000000000000000000..f7d623344858056ee2477a95e6d6f3ddb4300353 Binary files /dev/null and b/bin/transicc differ diff --git a/bin/tset b/bin/tset new file mode 100644 index 0000000000000000000000000000000000000000..7bd1d42789af918f9142590b2bdd5003cbbbedc6 Binary files /dev/null and b/bin/tset differ diff --git a/bin/ttx b/bin/ttx new file mode 100644 index 0000000000000000000000000000000000000000..155881f92dfafa750e8bf14c4ad245193366326f --- /dev/null +++ b/bin/ttx @@ -0,0 +1,8 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from fontTools.ttx import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/undill b/bin/undill new file mode 100644 index 0000000000000000000000000000000000000000..23e7ab7a39bd066fc5bf7fa0da06321bbc7a1ca5 --- /dev/null +++ b/bin/undill @@ -0,0 +1,22 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python3.10 +# +# Author: Mike McKerns (mmckerns @caltech and @uqfoundation) +# Copyright (c) 2008-2016 California Institute of Technology. +# Copyright (c) 2016-2025 The Uncertainty Quantification Foundation. +# License: 3-clause BSD. The full license text is available at: +# - https://github.com/uqfoundation/dill/blob/master/LICENSE +""" +unpickle the contents of a pickled object file + +Examples:: + + $ undill hello.pkl + ['hello', 'world'] +""" + +if __name__ == '__main__': + import sys + import dill + for file in sys.argv[1:]: + print (dill.load(open(file,'rb'))) + diff --git a/bin/webpinfo b/bin/webpinfo new file mode 100644 index 0000000000000000000000000000000000000000..05ad102e3661c619fc2b5172945681411779b24b Binary files /dev/null and b/bin/webpinfo differ diff --git a/bin/webpmux b/bin/webpmux new file mode 100644 index 0000000000000000000000000000000000000000..b6ff9d48baf1f071149bd419b629f3c97f6462df Binary files /dev/null and b/bin/webpmux differ diff --git a/bin/wheel b/bin/wheel new file mode 100644 index 0000000000000000000000000000000000000000..4eb5452c94391939e59990b316d3fe3b68e055c5 --- /dev/null +++ b/bin/wheel @@ -0,0 +1,11 @@ +#!/home/aioz-nghiale/anaconda3/envs/testing_softzoo_pointe/bin/python + +# -*- coding: utf-8 -*- +import re +import sys + +from wheel.cli import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/bin/wish b/bin/wish new file mode 100644 index 0000000000000000000000000000000000000000..87a6cee73f811480369ccae55a8de5adbea1d287 Binary files /dev/null and b/bin/wish differ diff --git a/bin/wish8.6 b/bin/wish8.6 new file mode 100644 index 0000000000000000000000000000000000000000..87a6cee73f811480369ccae55a8de5adbea1d287 Binary files /dev/null and b/bin/wish8.6 differ diff --git a/bin/wrjpgcom b/bin/wrjpgcom new file mode 100644 index 0000000000000000000000000000000000000000..fa7923a94ada0c323713409440b8f408be8bae7f Binary files /dev/null and b/bin/wrjpgcom differ diff --git a/bin/xmlwf b/bin/xmlwf new file mode 100644 index 0000000000000000000000000000000000000000..4d9d69353d388eecb857cb883a6113da2b4ac9e3 Binary files /dev/null and b/bin/xmlwf differ diff --git a/bin/xzcmp b/bin/xzcmp new file mode 100644 index 0000000000000000000000000000000000000000..92f84cdc529e1735e2340da23eaf7735cd8cb63f --- /dev/null +++ b/bin/xzcmp @@ -0,0 +1,220 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later + +# Copyright (C) 1998, 2002, 2006, 2007 Free Software Foundation +# Copyright (C) 1993 Jean-loup Gailly + +# Modified for XZ Utils by Andrew Dudman and Lasse Collin. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + + +#SET_PATH - This line is a placeholder to ease patching this script. + +# Instead of unsetting XZ_OPT, just make sure that xz will use file format +# autodetection. This way memory usage limit and thread limit can be +# specified via XZ_OPT. With gzip, bzip2, and lzop it's OK to just unset the +# environment variables. +xz='xz --format=auto' +unset GZIP BZIP BZIP2 LZOP + +case ${0##*/} in + *cmp*) prog=xzcmp; cmp=${CMP:-cmp};; + *) prog=xzdiff; cmp=${DIFF:-diff};; +esac + +version="$prog (XZ Utils) 5.6.4" + +usage="Usage: ${0##*/} [OPTION]... FILE1 [FILE2] +Compare FILE1 to FILE2, using their uncompressed contents if they are +compressed. If FILE2 is omitted, then the files compared are FILE1 and +FILE1 from which the compression format suffix has been stripped. + +Do comparisons like '$cmp' does. OPTIONs are the same as for '$cmp'. + +Report bugs to ." + +# sed script to escape all ' for the shell, and then (to handle trailing +# newlines correctly) turn trailing X on last line into '. +escape=' + s/'\''/'\''\\'\'''\''/g + $s/X$/'\''/ +' + +while :; do + case $1 in + --h*) printf '%s\n' "$usage" || exit 2; exit;; + --v*) printf '%s\n' "$version" || exit 2; exit;; + --) shift; break;; + -*\'*) cmp="$cmp '"`printf '%sX\n' "$1" | sed "$escape"`;; + -?*) cmp="$cmp '$1'";; + *) break;; + esac + shift +done +cmp="$cmp --" + +for file; do + test "X$file" = X- || <"$file" || exit 2 +done + +# xz needs -qQ to ignore warnings like unsupported check type. +xz1="$xz -qQ" +xz2="$xz -qQ" +xz_status=0 +exec 3>&1 + +if test $# -eq 1; then + case $1 in + *[-.]xz | *[-.]lzma | *[-.]lz | *.t[lx]z) + ;; + *[-.]bz2 | *.tbz | *.tbz2) + xz1=bzip2;; + *[-.][zZ] | *_z | *[-.]gz | *.t[ag]z) + xz1=gzip;; + *[-.]lzo | *.tzo) + xz1=lzop;; + *[-.]zst | *.tzst) + xz1='zstd -q';; + *[-.]lz4) + xz1=lz4;; + *) + printf '%s\n' "$0: $1: Unknown compressed file name suffix" >&2 + exit 2;; + esac + case $1 in + *[-.][zZ] | *_z | *[-.][gx]z | *[-.]bz2 | *[-.]lzma | *[-.]lz | *[-.]lzo | *[-.]zst | *[-.]lz4) + FILE=`expr "X$1" : 'X\(.*\)[-.][abglmostxzZ24]*$'`;; + *.t[abglx]z) + FILE=`expr "X$1" : 'X\(.*[-.]t\)[abglx]z$'`ar;; + *.tbz2) + FILE=`expr "X$1" : 'X\(.*[-.]t\)bz2$'`ar;; + *.tzo) + FILE=`expr "X$1" : 'X\(.*[-.]t\)zo$'`ar;; + *.tzst) + FILE=`expr "X$1" : 'X\(.*[-.]t\)zst$'`ar;; + esac + xz_status=$( + exec 4>&1 + ($xz1 -cd -- "$1" 4>&-; echo $? >&4) 3>&- | eval "$cmp" - '"$FILE"' >&3 + ) +elif test $# -eq 2; then + case $1 in + *[-.]bz2 | *.tbz | *.tbz2) xz1=bzip2;; + *[-.][zZ] | *_z | *[-.]gz | *.t[ag]z) xz1=gzip;; + *[-.]lzo | *.tzo) xz1=lzop;; + *[-.]zst | *.tzst) xz1='zstd -q';; + *[-.]lz4) xz1=lz4;; + esac + case $2 in + *[-.]bz2 | *.tbz | *.tbz2) xz2=bzip2;; + *[-.][zZ] | *_z | *[-.]gz | *.t[ag]z) xz2=gzip;; + *[-.]lzo | *.tzo) xz2=lzop;; + *[-.]zst | *.tzst) xz2='zstd -q';; + *[-.]lz4) xz2=lz4;; + esac + case $1 in + *[-.][zZ] | *_z | *[-.][gx]z | *[-.]bz2 | *[-.]lzma | *[-.]lz | *.t[abglx]z | *.tbz2 | *[-.]lzo | *.tzo | *[-.]zst | *.tzst | *[-.]lz4 | -) + case "$2" in + *[-.][zZ] | *_z | *[-.][gx]z | *[-.]bz2 | *[-.]lzma | *[-.]lz | *.t[abglx]z | *.tbz2 | *[-.]lzo | *.tzo | *[-.]zst | *.tzst | *[-.]lz4 | -) + if test "$1$2" = --; then + xz_status=$( + exec 4>&1 + ($xz1 -cdf - 4>&-; echo $? >&4) 3>&- | + eval "$cmp" - - >&3 + ) + elif # Reject Solaris 8's buggy /bin/bash 2.03. + echo X | (echo X | eval "$cmp" /dev/fd/5 - >/dev/null 2>&1) 5<&0; then + # NOTE: xz_status will contain two numbers. + xz_status=$( + exec 4>&1 + ($xz1 -cdf -- "$1" 4>&-; echo $? >&4) 3>&- | + ( ($xz2 -cdf -- "$2" 4>&-; echo $? >&4) 3>&- 5<&- &3) 5<&0 + ) + else + F=`expr "/$2" : '.*/\(.*\)[-.][ablmotxz2]*$'` || F=$prog + tmp= + trap ' + test -n "$tmp" && rm -rf "$tmp" + (exit 2); exit 2 + ' HUP INT PIPE TERM 0 + if type mktemp >/dev/null 2>&1; then + # Note that FreeBSD's mktemp isn't fully compatible with + # the implementations from mktemp.org and GNU coreutils. + # It is important that the -t argument is the last argument + # and that no "--" is used between -t and the template argument. + # This way this command works on all implementations. + tmp=`mktemp -d -t "$prog.XXXXXXXXXX"` || exit 2 + else + # Fallback code if mktemp is missing. This isn't as + # robust as using mktemp since this doesn't try with + # different file names in case of a file name conflict. + # + # There's no need to save the original umask since + # we don't create any non-temp files. Note that using + # mkdir -m 0077 isn't secure since some mkdir implementations + # create the dir with the default umask and chmod the + # the dir afterwards. + umask 0077 + mkdir -- "${TMPDIR-/tmp}/$prog.$$" || exit 2 + tmp="${TMPDIR-/tmp}/$prog.$$" + fi + $xz2 -cdf -- "$2" > "$tmp/$F" || exit 2 + xz_status=$( + exec 4>&1 + ($xz1 -cdf -- "$1" 4>&-; echo $? >&4) 3>&- | + eval "$cmp" - '"$tmp/$F"' >&3 + ) + cmp_status=$? + rm -rf "$tmp" || xz_status=$? + trap - HUP INT PIPE TERM 0 + (exit $cmp_status) + fi;; + *) + xz_status=$( + exec 4>&1 + ($xz1 -cdf -- "$1" 4>&-; echo $? >&4) 3>&- | + eval "$cmp" - '"$2"' >&3 + );; + esac;; + *) + case "$2" in + *[-.][zZ] | *_z | *[-.][gx]z | *[-.]bz2 | *[-.]lzma | *[-.]lz | *.t[abglx]z | *.tbz2 | *[-.]lzo | *.tzo | *[-.]zst | *.tzst | *[-.]lz4 | -) + xz_status=$( + exec 4>&1 + ($xz2 -cdf -- "$2" 4>&-; echo $? >&4) 3>&- | + eval "$cmp" '"$1"' - >&3 + );; + *) + eval "$cmp" '"$1"' '"$2"';; + esac;; + esac +else + printf '%s\n' "$0: Invalid number of operands; try '${0##*/} --help' for help" >&2 + exit 2 +fi + +cmp_status=$? +for num in $xz_status ; do + # 0 from decompressor means successful decompression. SIGPIPE from + # decompressor is possible when diff or cmp exits before the whole file + # has been decompressed. In that case we want to retain the exit status + # from diff or cmp. Note that using "trap '' PIPE" is not possible + # because gzip changes its behavior (including exit status) if SIGPIPE + # is ignored. + test "$num" -eq 0 && continue + test "$num" -ge 128 \ + && test "$(kill -l "$num" 2> /dev/null)" = "PIPE" \ + && continue + exit 2 +done +exit $cmp_status diff --git a/bin/xzdec b/bin/xzdec new file mode 100644 index 0000000000000000000000000000000000000000..dc8a7f484820616fce6f2a6f74bf94423b91b15c Binary files /dev/null and b/bin/xzdec differ diff --git a/bin/xzdiff b/bin/xzdiff new file mode 100644 index 0000000000000000000000000000000000000000..92f84cdc529e1735e2340da23eaf7735cd8cb63f --- /dev/null +++ b/bin/xzdiff @@ -0,0 +1,220 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later + +# Copyright (C) 1998, 2002, 2006, 2007 Free Software Foundation +# Copyright (C) 1993 Jean-loup Gailly + +# Modified for XZ Utils by Andrew Dudman and Lasse Collin. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + + +#SET_PATH - This line is a placeholder to ease patching this script. + +# Instead of unsetting XZ_OPT, just make sure that xz will use file format +# autodetection. This way memory usage limit and thread limit can be +# specified via XZ_OPT. With gzip, bzip2, and lzop it's OK to just unset the +# environment variables. +xz='xz --format=auto' +unset GZIP BZIP BZIP2 LZOP + +case ${0##*/} in + *cmp*) prog=xzcmp; cmp=${CMP:-cmp};; + *) prog=xzdiff; cmp=${DIFF:-diff};; +esac + +version="$prog (XZ Utils) 5.6.4" + +usage="Usage: ${0##*/} [OPTION]... FILE1 [FILE2] +Compare FILE1 to FILE2, using their uncompressed contents if they are +compressed. If FILE2 is omitted, then the files compared are FILE1 and +FILE1 from which the compression format suffix has been stripped. + +Do comparisons like '$cmp' does. OPTIONs are the same as for '$cmp'. + +Report bugs to ." + +# sed script to escape all ' for the shell, and then (to handle trailing +# newlines correctly) turn trailing X on last line into '. +escape=' + s/'\''/'\''\\'\'''\''/g + $s/X$/'\''/ +' + +while :; do + case $1 in + --h*) printf '%s\n' "$usage" || exit 2; exit;; + --v*) printf '%s\n' "$version" || exit 2; exit;; + --) shift; break;; + -*\'*) cmp="$cmp '"`printf '%sX\n' "$1" | sed "$escape"`;; + -?*) cmp="$cmp '$1'";; + *) break;; + esac + shift +done +cmp="$cmp --" + +for file; do + test "X$file" = X- || <"$file" || exit 2 +done + +# xz needs -qQ to ignore warnings like unsupported check type. +xz1="$xz -qQ" +xz2="$xz -qQ" +xz_status=0 +exec 3>&1 + +if test $# -eq 1; then + case $1 in + *[-.]xz | *[-.]lzma | *[-.]lz | *.t[lx]z) + ;; + *[-.]bz2 | *.tbz | *.tbz2) + xz1=bzip2;; + *[-.][zZ] | *_z | *[-.]gz | *.t[ag]z) + xz1=gzip;; + *[-.]lzo | *.tzo) + xz1=lzop;; + *[-.]zst | *.tzst) + xz1='zstd -q';; + *[-.]lz4) + xz1=lz4;; + *) + printf '%s\n' "$0: $1: Unknown compressed file name suffix" >&2 + exit 2;; + esac + case $1 in + *[-.][zZ] | *_z | *[-.][gx]z | *[-.]bz2 | *[-.]lzma | *[-.]lz | *[-.]lzo | *[-.]zst | *[-.]lz4) + FILE=`expr "X$1" : 'X\(.*\)[-.][abglmostxzZ24]*$'`;; + *.t[abglx]z) + FILE=`expr "X$1" : 'X\(.*[-.]t\)[abglx]z$'`ar;; + *.tbz2) + FILE=`expr "X$1" : 'X\(.*[-.]t\)bz2$'`ar;; + *.tzo) + FILE=`expr "X$1" : 'X\(.*[-.]t\)zo$'`ar;; + *.tzst) + FILE=`expr "X$1" : 'X\(.*[-.]t\)zst$'`ar;; + esac + xz_status=$( + exec 4>&1 + ($xz1 -cd -- "$1" 4>&-; echo $? >&4) 3>&- | eval "$cmp" - '"$FILE"' >&3 + ) +elif test $# -eq 2; then + case $1 in + *[-.]bz2 | *.tbz | *.tbz2) xz1=bzip2;; + *[-.][zZ] | *_z | *[-.]gz | *.t[ag]z) xz1=gzip;; + *[-.]lzo | *.tzo) xz1=lzop;; + *[-.]zst | *.tzst) xz1='zstd -q';; + *[-.]lz4) xz1=lz4;; + esac + case $2 in + *[-.]bz2 | *.tbz | *.tbz2) xz2=bzip2;; + *[-.][zZ] | *_z | *[-.]gz | *.t[ag]z) xz2=gzip;; + *[-.]lzo | *.tzo) xz2=lzop;; + *[-.]zst | *.tzst) xz2='zstd -q';; + *[-.]lz4) xz2=lz4;; + esac + case $1 in + *[-.][zZ] | *_z | *[-.][gx]z | *[-.]bz2 | *[-.]lzma | *[-.]lz | *.t[abglx]z | *.tbz2 | *[-.]lzo | *.tzo | *[-.]zst | *.tzst | *[-.]lz4 | -) + case "$2" in + *[-.][zZ] | *_z | *[-.][gx]z | *[-.]bz2 | *[-.]lzma | *[-.]lz | *.t[abglx]z | *.tbz2 | *[-.]lzo | *.tzo | *[-.]zst | *.tzst | *[-.]lz4 | -) + if test "$1$2" = --; then + xz_status=$( + exec 4>&1 + ($xz1 -cdf - 4>&-; echo $? >&4) 3>&- | + eval "$cmp" - - >&3 + ) + elif # Reject Solaris 8's buggy /bin/bash 2.03. + echo X | (echo X | eval "$cmp" /dev/fd/5 - >/dev/null 2>&1) 5<&0; then + # NOTE: xz_status will contain two numbers. + xz_status=$( + exec 4>&1 + ($xz1 -cdf -- "$1" 4>&-; echo $? >&4) 3>&- | + ( ($xz2 -cdf -- "$2" 4>&-; echo $? >&4) 3>&- 5<&- &3) 5<&0 + ) + else + F=`expr "/$2" : '.*/\(.*\)[-.][ablmotxz2]*$'` || F=$prog + tmp= + trap ' + test -n "$tmp" && rm -rf "$tmp" + (exit 2); exit 2 + ' HUP INT PIPE TERM 0 + if type mktemp >/dev/null 2>&1; then + # Note that FreeBSD's mktemp isn't fully compatible with + # the implementations from mktemp.org and GNU coreutils. + # It is important that the -t argument is the last argument + # and that no "--" is used between -t and the template argument. + # This way this command works on all implementations. + tmp=`mktemp -d -t "$prog.XXXXXXXXXX"` || exit 2 + else + # Fallback code if mktemp is missing. This isn't as + # robust as using mktemp since this doesn't try with + # different file names in case of a file name conflict. + # + # There's no need to save the original umask since + # we don't create any non-temp files. Note that using + # mkdir -m 0077 isn't secure since some mkdir implementations + # create the dir with the default umask and chmod the + # the dir afterwards. + umask 0077 + mkdir -- "${TMPDIR-/tmp}/$prog.$$" || exit 2 + tmp="${TMPDIR-/tmp}/$prog.$$" + fi + $xz2 -cdf -- "$2" > "$tmp/$F" || exit 2 + xz_status=$( + exec 4>&1 + ($xz1 -cdf -- "$1" 4>&-; echo $? >&4) 3>&- | + eval "$cmp" - '"$tmp/$F"' >&3 + ) + cmp_status=$? + rm -rf "$tmp" || xz_status=$? + trap - HUP INT PIPE TERM 0 + (exit $cmp_status) + fi;; + *) + xz_status=$( + exec 4>&1 + ($xz1 -cdf -- "$1" 4>&-; echo $? >&4) 3>&- | + eval "$cmp" - '"$2"' >&3 + );; + esac;; + *) + case "$2" in + *[-.][zZ] | *_z | *[-.][gx]z | *[-.]bz2 | *[-.]lzma | *[-.]lz | *.t[abglx]z | *.tbz2 | *[-.]lzo | *.tzo | *[-.]zst | *.tzst | *[-.]lz4 | -) + xz_status=$( + exec 4>&1 + ($xz2 -cdf -- "$2" 4>&-; echo $? >&4) 3>&- | + eval "$cmp" '"$1"' - >&3 + );; + *) + eval "$cmp" '"$1"' '"$2"';; + esac;; + esac +else + printf '%s\n' "$0: Invalid number of operands; try '${0##*/} --help' for help" >&2 + exit 2 +fi + +cmp_status=$? +for num in $xz_status ; do + # 0 from decompressor means successful decompression. SIGPIPE from + # decompressor is possible when diff or cmp exits before the whole file + # has been decompressed. In that case we want to retain the exit status + # from diff or cmp. Note that using "trap '' PIPE" is not possible + # because gzip changes its behavior (including exit status) if SIGPIPE + # is ignored. + test "$num" -eq 0 && continue + test "$num" -ge 128 \ + && test "$(kill -l "$num" 2> /dev/null)" = "PIPE" \ + && continue + exit 2 +done +exit $cmp_status diff --git a/bin/xzegrep b/bin/xzegrep new file mode 100644 index 0000000000000000000000000000000000000000..f2a73bb9297a231da8823435f9d8bde68787b63a --- /dev/null +++ b/bin/xzegrep @@ -0,0 +1,300 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later + +# xzgrep -- a wrapper around a grep program that decompresses files as needed +# Adapted from a version sent by Charles Levert + +# Copyright (C) 1998, 2001, 2002, 2006, 2007 Free Software Foundation +# Copyright (C) 1993 Jean-loup Gailly + +# Modified for XZ Utils by Andrew Dudman and Lasse Collin. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + + +#SET_PATH - This line is a placeholder to ease patching this script. + +# Instead of unsetting XZ_OPT, just make sure that xz will use file format +# autodetection. This way memory usage limit and thread limit can be +# specified via XZ_OPT. With gzip, bzip2, and lzop it's OK to just unset the +# environment variables. +xz='xz --format=auto' +unset GZIP BZIP BZIP2 LZOP + +case ${0##*/} in + *egrep*) prog=xzegrep; grep=${GREP:-grep -E};; + *fgrep*) prog=xzfgrep; grep=${GREP:-grep -F};; + *) prog=xzgrep; grep=${GREP:-grep};; +esac + +version="$prog (XZ Utils) 5.6.4" + +usage="Usage: ${0##*/} [OPTION]... [-e] PATTERN [FILE]... +Look for instances of PATTERN in the input FILEs, using their +uncompressed contents if they are compressed. + +OPTIONs are the same as for '$grep'. + +Report bugs to ." + +# sed script to escape all ' for the shell, and then (to handle trailing +# newlines correctly) turn trailing X on last line into '. +escape=' + s/'\''/'\''\\'\'''\''/g + $s/X$/'\''/ +' +operands= +have_pat=0 +files_with_matches=0 +files_without_matches=0 +no_filename=0 +with_filename=0 + +# See if -H and --label options are supported (GNU and *BSDs). +if test f:x = "$(eval "echo x | $grep -H --label=f x 2> /dev/null")"; then + grep_supports_label=1 +else + grep_supports_label=0 +fi + +while test $# -ne 0; do + option=$1 + shift + optarg= + + case $option in + (-[0123456789abcdEFGhHiIKlLnoPqrRsTuUvVwxyzZ]*[!0123456789]*) + # Something like -Fiv was specified, that is, $option contains more + # than one option of which the first option (in this example -F) + # doesn't take an argument. Split the first option into a standalone + # argument and continue parsing the rest of the options (in this example, + # replace -Fiv with -iv in the argument list and set option=-F). + # + # If there are digits [0-9] they are treated as if they were a single + # option character because this syntax is an alias for -C for GNU grep. + # For example, "grep -25F" is equivalent to "grep -C25 -F". If only + # digits are specified like "grep -25" we don't get here because the + # above pattern in the case-statement doesn't match such strings. + arg2=-\'$(LC_ALL=C expr "X${option}X" : 'X-.[0-9]*\(.*\)' | + LC_ALL=C sed "$escape") + eval "set -- $arg2 "'${1+"$@"}' + option=$(LC_ALL=C expr "X$option" : 'X\(-.[0-9]*\)');; + (--binary-*=* | --[lm]a*=* | --reg*=*) + # These options require an argument and an argument has been provided + # with the --foo=argument syntax. All is good. + ;; + (-[ABCDefmX] | --binary-* | --file | --[lm]a* | --reg*) + # These options require an argument which should now be in $1. + # If it isn't, display an error and exit. + case ${1?"$option option requires an argument"} in + (*\'*) + optarg=" '"$(printf '%sX\n' "$1" | LC_ALL=C sed "$escape");; + (*) + optarg=" '$1'";; + esac + shift;; + (--) + break;; + (-?*) + ;; + (*) + case $option in + (*\'*) + operands="$operands '"$(printf '%sX\n' "$option" | + LC_ALL=C sed "$escape");; + (*) + operands="$operands '$option'";; + esac + ${POSIXLY_CORRECT+break} + continue;; + esac + + case $option in + (-[drRzZ] | --di* | --exc* | --inc* | --rec* | --nu*) + printf >&2 '%s: %s: Option not supported\n' "$0" "$option" + exit 2;; + (-[ef]* | --file | --file=* | --reg*) + have_pat=1;; + (--h | --he | --hel | --help) + printf '%s\n' "$usage" || exit 2 + exit;; + (-H | --wi | --wit | --with | --with- | --with-f | --with-fi \ + | --with-fil | --with-file | --with-filen | --with-filena | --with-filenam \ + | --with-filename) + with_filename=1 + continue;; + (-l | --files-with-*) + files_with_matches=1 + continue;; + (-L | --files-witho*) + files_without_matches=1 + continue;; + (-h | --no-f*) + no_filename=1;; + (-V | --v | --ve | --ver | --vers | --versi | --versio | --version) + printf '%s\n' "$version" || exit 2 + exit;; + esac + + case $option in + (*\'?*) + option=\'$(printf '%sX\n' "$option" | LC_ALL=C sed "$escape");; + (*) + option="'$option'";; + esac + + grep="$grep $option$optarg" +done + +eval "set -- $operands "'${1+"$@"}' + +if test $have_pat -eq 0; then + case ${1?"Missing pattern; try '${0##*/} --help' for help"} in + (*\'*) + grep="$grep -e '"$(printf '%sX\n' "$1" | LC_ALL=C sed "$escape");; + (*) + grep="$grep -e '$1'";; + esac + shift +fi + +if test $# -eq 0; then + set -- - +fi + +exec 3>&1 + +# res=1 means that no file matched yet +res=1 + +for i; do + case $i in + *[-.][zZ] | *_z | *[-.]gz | *.t[ag]z) uncompress="gzip -cdf";; + *[-.]bz2 | *[-.]tbz | *.tbz2) uncompress="bzip2 -cdf";; + *[-.]lzo | *[-.]tzo) uncompress="lzop -cdf";; + *[-.]zst | *[-.]tzst) uncompress="zstd -cdfq";; # zstd needs -q. + *[-.]lz4) uncompress="lz4 -cdf";; + *) uncompress="$xz -cdfqQ";; # -qQ to ignore warnings like unsupp. check. + esac + # xz_status will hold the decompressor's exit status. + # Exit status of grep (and in rare cases, printf or sed) is + # available as the exit status of this assignment command. + xz_status=$( + exec 5>&1 + ($uncompress -- "$i" 5>&-; echo $? >&5) 3>&- | + if test $files_with_matches -eq 1; then + eval "$grep -q" && { printf '%s\n' "$i" || exit 2; } + elif test $files_without_matches -eq 1; then + eval "$grep -q" || { + r=$? + if test $r -eq 1; then + printf '%s\n' "$i" || r=2 + fi + exit $r + } + elif test $with_filename -eq 0 && + { test $# -eq 1 || test $no_filename -eq 1; }; then + eval "$grep" + elif test $grep_supports_label -eq 1; then + # The grep implementation in use allows us to specify the filename + # that grep will prefix to the output lines. This is faster and + # less prone to security bugs than the fallback method that uses sed. + # This also avoids confusing output with GNU grep >= 3.5 (2020-09-27) + # which prints "binary file matches" to stderr instead of stdout. + # + # If reading from stdin, let grep use whatever name it prefers for + # stdin. With GNU grep it is a locale-specific translated string. + if test "x$i" = "x-"; then + eval "$grep -H" + else + eval "$grep -H --label \"\$i\"" + fi + else + # Append a colon so that the last character will never be a newline + # which would otherwise get lost in shell command substitution. + i="$i:" + + # Escape & \ | and newlines only if such characters are present + # (speed optimization). + case $i in + (*' +'* | *'&'* | *'\'* | *'|'*) + # If sed fails, set i to a known safe string to ensure that + # failing sed did not create a half-escaped dangerous string. + i=$(printf '%s\n' "$i" | LC_ALL=C sed 's/[&\|]/\\&/g; $!s/$/\\/') || + i='(unknown filename):';; + esac + + # $i already ends with a colon so do not add it here. + sed_script="s|^|$i|" + + # If grep or sed fails, pick the larger value of the two exit statuses. + # If sed fails, use at least 2 since we use >= 2 to indicate errors. + r=$( + exec 4>&1 + (eval "$grep" 4>&-; echo $? >&4) 3>&- | + LC_ALL=C sed "$sed_script" >&3 4>&- + ) || { + sed_status=$? + test "$sed_status" -lt 2 && sed_status=2 + test "$r" -lt "$sed_status" && r=$sed_status + } + exit $r + fi >&3 5>&- + ) + r=$? + + # If grep or sed or other non-decompression command failed with a signal, + # exit immediately and ignore the possible remaining files. + # + # NOTE: Instead of 128 + signal_number, some shells use + # 256 + signal_number (ksh) or 384 + signal_number (yash). + # This is fine for us since their "exit" and "kill -l" commands take + # this into account. (At least the versions I tried do but there is + # a report of an old ksh variant whose "exit" truncates the exit status + # to 8 bits without any special handling for values indicating a signal.) + test "$r" -ge 128 && exit "$r" + + if test -z "$xz_status"; then + # Something unusual happened, for example, we got a signal and + # the exit status of the decompressor was never echoed and thus + # $xz_status is empty. Exit immediately and ignore the possible + # remaining files. + exit 2 + elif test "$xz_status" -ge 128; then + # The decompressor died due to a signal. SIGPIPE is ignored since it can + # occur if grep exits before the whole file has been decompressed (grep -q + # can do that). If the decompressor died with some other signal, exit + # immediately and ignore the possible remaining files. + test "$(kill -l "$xz_status" 2> /dev/null)" != "PIPE" && exit "$xz_status" + elif test "$xz_status" -gt 0; then + # Decompression failed but we will continue with the remaining + # files anyway. Set exit status to at least 2 to indicate an error. + test "$r" -lt 2 && r=2 + fi + + # Since res=1 is the initial value, we only need to care about + # matches (r == 0) and errors (r >= 2) here; r == 1 can be ignored. + if test "$r" -ge 2; then + # An error occurred in decompressor, grep, or some other command. Update + # res unless a larger error code has been seen with an earlier file. + test "$res" -lt "$r" && res=$r + elif test "$r" -eq 0; then + # grep found a match and no errors occurred. Update res if no errors have + # occurred with earlier files. + test "$res" -eq 1 && res=0 + fi +done + +# 0: At least one file matched and no errors occurred. +# 1: No matches were found and no errors occurred. +# >=2: Error. It's unknown if matches were found. +exit "$res" diff --git a/bin/xzfgrep b/bin/xzfgrep new file mode 100644 index 0000000000000000000000000000000000000000..f2a73bb9297a231da8823435f9d8bde68787b63a --- /dev/null +++ b/bin/xzfgrep @@ -0,0 +1,300 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later + +# xzgrep -- a wrapper around a grep program that decompresses files as needed +# Adapted from a version sent by Charles Levert + +# Copyright (C) 1998, 2001, 2002, 2006, 2007 Free Software Foundation +# Copyright (C) 1993 Jean-loup Gailly + +# Modified for XZ Utils by Andrew Dudman and Lasse Collin. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + + +#SET_PATH - This line is a placeholder to ease patching this script. + +# Instead of unsetting XZ_OPT, just make sure that xz will use file format +# autodetection. This way memory usage limit and thread limit can be +# specified via XZ_OPT. With gzip, bzip2, and lzop it's OK to just unset the +# environment variables. +xz='xz --format=auto' +unset GZIP BZIP BZIP2 LZOP + +case ${0##*/} in + *egrep*) prog=xzegrep; grep=${GREP:-grep -E};; + *fgrep*) prog=xzfgrep; grep=${GREP:-grep -F};; + *) prog=xzgrep; grep=${GREP:-grep};; +esac + +version="$prog (XZ Utils) 5.6.4" + +usage="Usage: ${0##*/} [OPTION]... [-e] PATTERN [FILE]... +Look for instances of PATTERN in the input FILEs, using their +uncompressed contents if they are compressed. + +OPTIONs are the same as for '$grep'. + +Report bugs to ." + +# sed script to escape all ' for the shell, and then (to handle trailing +# newlines correctly) turn trailing X on last line into '. +escape=' + s/'\''/'\''\\'\'''\''/g + $s/X$/'\''/ +' +operands= +have_pat=0 +files_with_matches=0 +files_without_matches=0 +no_filename=0 +with_filename=0 + +# See if -H and --label options are supported (GNU and *BSDs). +if test f:x = "$(eval "echo x | $grep -H --label=f x 2> /dev/null")"; then + grep_supports_label=1 +else + grep_supports_label=0 +fi + +while test $# -ne 0; do + option=$1 + shift + optarg= + + case $option in + (-[0123456789abcdEFGhHiIKlLnoPqrRsTuUvVwxyzZ]*[!0123456789]*) + # Something like -Fiv was specified, that is, $option contains more + # than one option of which the first option (in this example -F) + # doesn't take an argument. Split the first option into a standalone + # argument and continue parsing the rest of the options (in this example, + # replace -Fiv with -iv in the argument list and set option=-F). + # + # If there are digits [0-9] they are treated as if they were a single + # option character because this syntax is an alias for -C for GNU grep. + # For example, "grep -25F" is equivalent to "grep -C25 -F". If only + # digits are specified like "grep -25" we don't get here because the + # above pattern in the case-statement doesn't match such strings. + arg2=-\'$(LC_ALL=C expr "X${option}X" : 'X-.[0-9]*\(.*\)' | + LC_ALL=C sed "$escape") + eval "set -- $arg2 "'${1+"$@"}' + option=$(LC_ALL=C expr "X$option" : 'X\(-.[0-9]*\)');; + (--binary-*=* | --[lm]a*=* | --reg*=*) + # These options require an argument and an argument has been provided + # with the --foo=argument syntax. All is good. + ;; + (-[ABCDefmX] | --binary-* | --file | --[lm]a* | --reg*) + # These options require an argument which should now be in $1. + # If it isn't, display an error and exit. + case ${1?"$option option requires an argument"} in + (*\'*) + optarg=" '"$(printf '%sX\n' "$1" | LC_ALL=C sed "$escape");; + (*) + optarg=" '$1'";; + esac + shift;; + (--) + break;; + (-?*) + ;; + (*) + case $option in + (*\'*) + operands="$operands '"$(printf '%sX\n' "$option" | + LC_ALL=C sed "$escape");; + (*) + operands="$operands '$option'";; + esac + ${POSIXLY_CORRECT+break} + continue;; + esac + + case $option in + (-[drRzZ] | --di* | --exc* | --inc* | --rec* | --nu*) + printf >&2 '%s: %s: Option not supported\n' "$0" "$option" + exit 2;; + (-[ef]* | --file | --file=* | --reg*) + have_pat=1;; + (--h | --he | --hel | --help) + printf '%s\n' "$usage" || exit 2 + exit;; + (-H | --wi | --wit | --with | --with- | --with-f | --with-fi \ + | --with-fil | --with-file | --with-filen | --with-filena | --with-filenam \ + | --with-filename) + with_filename=1 + continue;; + (-l | --files-with-*) + files_with_matches=1 + continue;; + (-L | --files-witho*) + files_without_matches=1 + continue;; + (-h | --no-f*) + no_filename=1;; + (-V | --v | --ve | --ver | --vers | --versi | --versio | --version) + printf '%s\n' "$version" || exit 2 + exit;; + esac + + case $option in + (*\'?*) + option=\'$(printf '%sX\n' "$option" | LC_ALL=C sed "$escape");; + (*) + option="'$option'";; + esac + + grep="$grep $option$optarg" +done + +eval "set -- $operands "'${1+"$@"}' + +if test $have_pat -eq 0; then + case ${1?"Missing pattern; try '${0##*/} --help' for help"} in + (*\'*) + grep="$grep -e '"$(printf '%sX\n' "$1" | LC_ALL=C sed "$escape");; + (*) + grep="$grep -e '$1'";; + esac + shift +fi + +if test $# -eq 0; then + set -- - +fi + +exec 3>&1 + +# res=1 means that no file matched yet +res=1 + +for i; do + case $i in + *[-.][zZ] | *_z | *[-.]gz | *.t[ag]z) uncompress="gzip -cdf";; + *[-.]bz2 | *[-.]tbz | *.tbz2) uncompress="bzip2 -cdf";; + *[-.]lzo | *[-.]tzo) uncompress="lzop -cdf";; + *[-.]zst | *[-.]tzst) uncompress="zstd -cdfq";; # zstd needs -q. + *[-.]lz4) uncompress="lz4 -cdf";; + *) uncompress="$xz -cdfqQ";; # -qQ to ignore warnings like unsupp. check. + esac + # xz_status will hold the decompressor's exit status. + # Exit status of grep (and in rare cases, printf or sed) is + # available as the exit status of this assignment command. + xz_status=$( + exec 5>&1 + ($uncompress -- "$i" 5>&-; echo $? >&5) 3>&- | + if test $files_with_matches -eq 1; then + eval "$grep -q" && { printf '%s\n' "$i" || exit 2; } + elif test $files_without_matches -eq 1; then + eval "$grep -q" || { + r=$? + if test $r -eq 1; then + printf '%s\n' "$i" || r=2 + fi + exit $r + } + elif test $with_filename -eq 0 && + { test $# -eq 1 || test $no_filename -eq 1; }; then + eval "$grep" + elif test $grep_supports_label -eq 1; then + # The grep implementation in use allows us to specify the filename + # that grep will prefix to the output lines. This is faster and + # less prone to security bugs than the fallback method that uses sed. + # This also avoids confusing output with GNU grep >= 3.5 (2020-09-27) + # which prints "binary file matches" to stderr instead of stdout. + # + # If reading from stdin, let grep use whatever name it prefers for + # stdin. With GNU grep it is a locale-specific translated string. + if test "x$i" = "x-"; then + eval "$grep -H" + else + eval "$grep -H --label \"\$i\"" + fi + else + # Append a colon so that the last character will never be a newline + # which would otherwise get lost in shell command substitution. + i="$i:" + + # Escape & \ | and newlines only if such characters are present + # (speed optimization). + case $i in + (*' +'* | *'&'* | *'\'* | *'|'*) + # If sed fails, set i to a known safe string to ensure that + # failing sed did not create a half-escaped dangerous string. + i=$(printf '%s\n' "$i" | LC_ALL=C sed 's/[&\|]/\\&/g; $!s/$/\\/') || + i='(unknown filename):';; + esac + + # $i already ends with a colon so do not add it here. + sed_script="s|^|$i|" + + # If grep or sed fails, pick the larger value of the two exit statuses. + # If sed fails, use at least 2 since we use >= 2 to indicate errors. + r=$( + exec 4>&1 + (eval "$grep" 4>&-; echo $? >&4) 3>&- | + LC_ALL=C sed "$sed_script" >&3 4>&- + ) || { + sed_status=$? + test "$sed_status" -lt 2 && sed_status=2 + test "$r" -lt "$sed_status" && r=$sed_status + } + exit $r + fi >&3 5>&- + ) + r=$? + + # If grep or sed or other non-decompression command failed with a signal, + # exit immediately and ignore the possible remaining files. + # + # NOTE: Instead of 128 + signal_number, some shells use + # 256 + signal_number (ksh) or 384 + signal_number (yash). + # This is fine for us since their "exit" and "kill -l" commands take + # this into account. (At least the versions I tried do but there is + # a report of an old ksh variant whose "exit" truncates the exit status + # to 8 bits without any special handling for values indicating a signal.) + test "$r" -ge 128 && exit "$r" + + if test -z "$xz_status"; then + # Something unusual happened, for example, we got a signal and + # the exit status of the decompressor was never echoed and thus + # $xz_status is empty. Exit immediately and ignore the possible + # remaining files. + exit 2 + elif test "$xz_status" -ge 128; then + # The decompressor died due to a signal. SIGPIPE is ignored since it can + # occur if grep exits before the whole file has been decompressed (grep -q + # can do that). If the decompressor died with some other signal, exit + # immediately and ignore the possible remaining files. + test "$(kill -l "$xz_status" 2> /dev/null)" != "PIPE" && exit "$xz_status" + elif test "$xz_status" -gt 0; then + # Decompression failed but we will continue with the remaining + # files anyway. Set exit status to at least 2 to indicate an error. + test "$r" -lt 2 && r=2 + fi + + # Since res=1 is the initial value, we only need to care about + # matches (r == 0) and errors (r >= 2) here; r == 1 can be ignored. + if test "$r" -ge 2; then + # An error occurred in decompressor, grep, or some other command. Update + # res unless a larger error code has been seen with an earlier file. + test "$res" -lt "$r" && res=$r + elif test "$r" -eq 0; then + # grep found a match and no errors occurred. Update res if no errors have + # occurred with earlier files. + test "$res" -eq 1 && res=0 + fi +done + +# 0: At least one file matched and no errors occurred. +# 1: No matches were found and no errors occurred. +# >=2: Error. It's unknown if matches were found. +exit "$res" diff --git a/bin/xzgrep b/bin/xzgrep new file mode 100644 index 0000000000000000000000000000000000000000..f2a73bb9297a231da8823435f9d8bde68787b63a --- /dev/null +++ b/bin/xzgrep @@ -0,0 +1,300 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later + +# xzgrep -- a wrapper around a grep program that decompresses files as needed +# Adapted from a version sent by Charles Levert + +# Copyright (C) 1998, 2001, 2002, 2006, 2007 Free Software Foundation +# Copyright (C) 1993 Jean-loup Gailly + +# Modified for XZ Utils by Andrew Dudman and Lasse Collin. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + + +#SET_PATH - This line is a placeholder to ease patching this script. + +# Instead of unsetting XZ_OPT, just make sure that xz will use file format +# autodetection. This way memory usage limit and thread limit can be +# specified via XZ_OPT. With gzip, bzip2, and lzop it's OK to just unset the +# environment variables. +xz='xz --format=auto' +unset GZIP BZIP BZIP2 LZOP + +case ${0##*/} in + *egrep*) prog=xzegrep; grep=${GREP:-grep -E};; + *fgrep*) prog=xzfgrep; grep=${GREP:-grep -F};; + *) prog=xzgrep; grep=${GREP:-grep};; +esac + +version="$prog (XZ Utils) 5.6.4" + +usage="Usage: ${0##*/} [OPTION]... [-e] PATTERN [FILE]... +Look for instances of PATTERN in the input FILEs, using their +uncompressed contents if they are compressed. + +OPTIONs are the same as for '$grep'. + +Report bugs to ." + +# sed script to escape all ' for the shell, and then (to handle trailing +# newlines correctly) turn trailing X on last line into '. +escape=' + s/'\''/'\''\\'\'''\''/g + $s/X$/'\''/ +' +operands= +have_pat=0 +files_with_matches=0 +files_without_matches=0 +no_filename=0 +with_filename=0 + +# See if -H and --label options are supported (GNU and *BSDs). +if test f:x = "$(eval "echo x | $grep -H --label=f x 2> /dev/null")"; then + grep_supports_label=1 +else + grep_supports_label=0 +fi + +while test $# -ne 0; do + option=$1 + shift + optarg= + + case $option in + (-[0123456789abcdEFGhHiIKlLnoPqrRsTuUvVwxyzZ]*[!0123456789]*) + # Something like -Fiv was specified, that is, $option contains more + # than one option of which the first option (in this example -F) + # doesn't take an argument. Split the first option into a standalone + # argument and continue parsing the rest of the options (in this example, + # replace -Fiv with -iv in the argument list and set option=-F). + # + # If there are digits [0-9] they are treated as if they were a single + # option character because this syntax is an alias for -C for GNU grep. + # For example, "grep -25F" is equivalent to "grep -C25 -F". If only + # digits are specified like "grep -25" we don't get here because the + # above pattern in the case-statement doesn't match such strings. + arg2=-\'$(LC_ALL=C expr "X${option}X" : 'X-.[0-9]*\(.*\)' | + LC_ALL=C sed "$escape") + eval "set -- $arg2 "'${1+"$@"}' + option=$(LC_ALL=C expr "X$option" : 'X\(-.[0-9]*\)');; + (--binary-*=* | --[lm]a*=* | --reg*=*) + # These options require an argument and an argument has been provided + # with the --foo=argument syntax. All is good. + ;; + (-[ABCDefmX] | --binary-* | --file | --[lm]a* | --reg*) + # These options require an argument which should now be in $1. + # If it isn't, display an error and exit. + case ${1?"$option option requires an argument"} in + (*\'*) + optarg=" '"$(printf '%sX\n' "$1" | LC_ALL=C sed "$escape");; + (*) + optarg=" '$1'";; + esac + shift;; + (--) + break;; + (-?*) + ;; + (*) + case $option in + (*\'*) + operands="$operands '"$(printf '%sX\n' "$option" | + LC_ALL=C sed "$escape");; + (*) + operands="$operands '$option'";; + esac + ${POSIXLY_CORRECT+break} + continue;; + esac + + case $option in + (-[drRzZ] | --di* | --exc* | --inc* | --rec* | --nu*) + printf >&2 '%s: %s: Option not supported\n' "$0" "$option" + exit 2;; + (-[ef]* | --file | --file=* | --reg*) + have_pat=1;; + (--h | --he | --hel | --help) + printf '%s\n' "$usage" || exit 2 + exit;; + (-H | --wi | --wit | --with | --with- | --with-f | --with-fi \ + | --with-fil | --with-file | --with-filen | --with-filena | --with-filenam \ + | --with-filename) + with_filename=1 + continue;; + (-l | --files-with-*) + files_with_matches=1 + continue;; + (-L | --files-witho*) + files_without_matches=1 + continue;; + (-h | --no-f*) + no_filename=1;; + (-V | --v | --ve | --ver | --vers | --versi | --versio | --version) + printf '%s\n' "$version" || exit 2 + exit;; + esac + + case $option in + (*\'?*) + option=\'$(printf '%sX\n' "$option" | LC_ALL=C sed "$escape");; + (*) + option="'$option'";; + esac + + grep="$grep $option$optarg" +done + +eval "set -- $operands "'${1+"$@"}' + +if test $have_pat -eq 0; then + case ${1?"Missing pattern; try '${0##*/} --help' for help"} in + (*\'*) + grep="$grep -e '"$(printf '%sX\n' "$1" | LC_ALL=C sed "$escape");; + (*) + grep="$grep -e '$1'";; + esac + shift +fi + +if test $# -eq 0; then + set -- - +fi + +exec 3>&1 + +# res=1 means that no file matched yet +res=1 + +for i; do + case $i in + *[-.][zZ] | *_z | *[-.]gz | *.t[ag]z) uncompress="gzip -cdf";; + *[-.]bz2 | *[-.]tbz | *.tbz2) uncompress="bzip2 -cdf";; + *[-.]lzo | *[-.]tzo) uncompress="lzop -cdf";; + *[-.]zst | *[-.]tzst) uncompress="zstd -cdfq";; # zstd needs -q. + *[-.]lz4) uncompress="lz4 -cdf";; + *) uncompress="$xz -cdfqQ";; # -qQ to ignore warnings like unsupp. check. + esac + # xz_status will hold the decompressor's exit status. + # Exit status of grep (and in rare cases, printf or sed) is + # available as the exit status of this assignment command. + xz_status=$( + exec 5>&1 + ($uncompress -- "$i" 5>&-; echo $? >&5) 3>&- | + if test $files_with_matches -eq 1; then + eval "$grep -q" && { printf '%s\n' "$i" || exit 2; } + elif test $files_without_matches -eq 1; then + eval "$grep -q" || { + r=$? + if test $r -eq 1; then + printf '%s\n' "$i" || r=2 + fi + exit $r + } + elif test $with_filename -eq 0 && + { test $# -eq 1 || test $no_filename -eq 1; }; then + eval "$grep" + elif test $grep_supports_label -eq 1; then + # The grep implementation in use allows us to specify the filename + # that grep will prefix to the output lines. This is faster and + # less prone to security bugs than the fallback method that uses sed. + # This also avoids confusing output with GNU grep >= 3.5 (2020-09-27) + # which prints "binary file matches" to stderr instead of stdout. + # + # If reading from stdin, let grep use whatever name it prefers for + # stdin. With GNU grep it is a locale-specific translated string. + if test "x$i" = "x-"; then + eval "$grep -H" + else + eval "$grep -H --label \"\$i\"" + fi + else + # Append a colon so that the last character will never be a newline + # which would otherwise get lost in shell command substitution. + i="$i:" + + # Escape & \ | and newlines only if such characters are present + # (speed optimization). + case $i in + (*' +'* | *'&'* | *'\'* | *'|'*) + # If sed fails, set i to a known safe string to ensure that + # failing sed did not create a half-escaped dangerous string. + i=$(printf '%s\n' "$i" | LC_ALL=C sed 's/[&\|]/\\&/g; $!s/$/\\/') || + i='(unknown filename):';; + esac + + # $i already ends with a colon so do not add it here. + sed_script="s|^|$i|" + + # If grep or sed fails, pick the larger value of the two exit statuses. + # If sed fails, use at least 2 since we use >= 2 to indicate errors. + r=$( + exec 4>&1 + (eval "$grep" 4>&-; echo $? >&4) 3>&- | + LC_ALL=C sed "$sed_script" >&3 4>&- + ) || { + sed_status=$? + test "$sed_status" -lt 2 && sed_status=2 + test "$r" -lt "$sed_status" && r=$sed_status + } + exit $r + fi >&3 5>&- + ) + r=$? + + # If grep or sed or other non-decompression command failed with a signal, + # exit immediately and ignore the possible remaining files. + # + # NOTE: Instead of 128 + signal_number, some shells use + # 256 + signal_number (ksh) or 384 + signal_number (yash). + # This is fine for us since their "exit" and "kill -l" commands take + # this into account. (At least the versions I tried do but there is + # a report of an old ksh variant whose "exit" truncates the exit status + # to 8 bits without any special handling for values indicating a signal.) + test "$r" -ge 128 && exit "$r" + + if test -z "$xz_status"; then + # Something unusual happened, for example, we got a signal and + # the exit status of the decompressor was never echoed and thus + # $xz_status is empty. Exit immediately and ignore the possible + # remaining files. + exit 2 + elif test "$xz_status" -ge 128; then + # The decompressor died due to a signal. SIGPIPE is ignored since it can + # occur if grep exits before the whole file has been decompressed (grep -q + # can do that). If the decompressor died with some other signal, exit + # immediately and ignore the possible remaining files. + test "$(kill -l "$xz_status" 2> /dev/null)" != "PIPE" && exit "$xz_status" + elif test "$xz_status" -gt 0; then + # Decompression failed but we will continue with the remaining + # files anyway. Set exit status to at least 2 to indicate an error. + test "$r" -lt 2 && r=2 + fi + + # Since res=1 is the initial value, we only need to care about + # matches (r == 0) and errors (r >= 2) here; r == 1 can be ignored. + if test "$r" -ge 2; then + # An error occurred in decompressor, grep, or some other command. Update + # res unless a larger error code has been seen with an earlier file. + test "$res" -lt "$r" && res=$r + elif test "$r" -eq 0; then + # grep found a match and no errors occurred. Update res if no errors have + # occurred with earlier files. + test "$res" -eq 1 && res=0 + fi +done + +# 0: At least one file matched and no errors occurred. +# 1: No matches were found and no errors occurred. +# >=2: Error. It's unknown if matches were found. +exit "$res" diff --git a/bin/xzless b/bin/xzless new file mode 100644 index 0000000000000000000000000000000000000000..ac10bbc4be5eb19a553660fddc3d04a845527767 --- /dev/null +++ b/bin/xzless @@ -0,0 +1,74 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later + +# Copyright (C) 1998, 2002, 2006, 2007 Free Software Foundation + +# The original version for gzip was written by Paul Eggert. +# Modified for XZ Utils by Andrew Dudman and Lasse Collin. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + + +#SET_PATH - This line is a placeholder to ease patching this script. + +# Instead of unsetting XZ_OPT, just make sure that xz will use file format +# autodetection. This way memory usage limit and thread limit can be +# specified via XZ_OPT. +xz='xz --format=auto' + +version='xzless (XZ Utils) 5.6.4' + +usage="Usage: ${0##*/} [OPTION]... [FILE]... +Like 'less', but operate on the uncompressed contents of xz compressed FILEs. + +Options are the same as for 'less'. + +Report bugs to ." + +case $1 in + --help) printf '%s\n' "$usage" || exit 2; exit;; + --version) printf '%s\n' "$version" || exit 2; exit;; +esac + +if test "${LESSMETACHARS+set}" != set; then + # Work around a bug in less 394 and earlier; + # it mishandles the metacharacters '$%=~'. + space=' ' + tab=' ' + nl=' +' + LESSMETACHARS="$space$tab$nl'"';*?"()<>[|&^`#\$%=~' +fi + +VER=$(less -V | { read _ ver _ && echo ${ver%%.*}; }) +if test "$VER" -ge 451; then + # less 451 or later: If the compressed file is valid but has + # zero bytes of uncompressed data, using two vertical bars ||- makes + # "less" check the exit status of xz and if it is zero then display + # an empty file. With a single vertical bar |- and no output from xz, + # "less" would attempt to display the raw input file instead. + LESSOPEN="||-$xz -cdfqQ -- %s" +elif test "$VER" -ge 429; then + # less 429 or later: LESSOPEN pipe will be used on + # standard input if $LESSOPEN begins with |-. + LESSOPEN="|-$xz -cdfqQ -- %s" +else + LESSOPEN="|$xz -cdfqQ -- %s" +fi + +SHOW_PREPROC_ERRORS= +if test "$VER" -ge 632; then + SHOW_PREPROC_ERRORS=--show-preproc-errors +fi + +export LESSMETACHARS LESSOPEN + +exec less $SHOW_PREPROC_ERRORS "$@" diff --git a/bin/xzmore b/bin/xzmore new file mode 100644 index 0000000000000000000000000000000000000000..1a7078e38564268f323b19058a46f32f14aad11e --- /dev/null +++ b/bin/xzmore @@ -0,0 +1,80 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later + +# Copyright (C) 2001, 2002, 2007 Free Software Foundation +# Copyright (C) 1992, 1993 Jean-loup Gailly + +# Modified for XZ Utils by Andrew Dudman and Lasse Collin. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + + +#SET_PATH - This line is a placeholder to ease patching this script. + +# Instead of unsetting XZ_OPT, just make sure that xz will use file format +# autodetection. This way memory usage limit and thread limit can be +# specified via XZ_OPT. +xz='xz --format=auto' + +version='xzmore (XZ Utils) 5.6.4' + +usage="Usage: ${0##*/} [OPTION]... [FILE]... +Like 'more', but operate on the uncompressed contents of xz compressed FILEs. + +Report bugs to ." + +case $1 in + --help) printf '%s\n' "$usage" || exit 2; exit;; + --version) printf '%s\n' "$version" || exit 2; exit;; +esac + +oldtty=`stty -g 2>/dev/null` +if stty -cbreak 2>/dev/null; then + cb='cbreak'; ncb='-cbreak' +else + # 'stty min 1' resets eof to ^a on both SunOS and SysV! + cb='min 1 -icanon'; ncb='icanon eof ^d' +fi +if test $? -eq 0 && test -n "$oldtty"; then + trap 'stty $oldtty 2>/dev/null; exit' 0 2 3 5 10 13 15 +else + trap 'stty $ncb echo 2>/dev/null; exit' 0 2 3 5 10 13 15 +fi + +if test $# = 0; then + if test -t 0; then + printf '%s\n' "$usage"; exit 1 + else + $xz -cdfqQ | eval "${PAGER:-more}" + fi +else + FIRST=1 + for FILE; do + < "$FILE" || continue + if test $FIRST -eq 0; then + printf "%s--More--(Next file: %s)" "" "$FILE" + stty $cb -echo 2>/dev/null + ANS=`dd bs=1 count=1 2>/dev/null` + stty $ncb echo 2>/dev/null + echo " " + case "$ANS" in + [eq]) exit;; + esac + fi + if test "$ANS" != 's'; then + printf '%s\n' "------> $FILE <------" + $xz -cdfqQ -- "$FILE" | eval "${PAGER:-more}" + fi + if test -t 1; then + FIRST=0 + fi + done +fi diff --git a/bin/zstdgrep b/bin/zstdgrep new file mode 100644 index 0000000000000000000000000000000000000000..61efaa9474a0058699a0780c769c4b49e487b3e1 --- /dev/null +++ b/bin/zstdgrep @@ -0,0 +1,134 @@ +#!/bin/sh +# +# Copyright (c) 2003 Thomas Klausner. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +grep=${GREP:-grep} +zcat=${ZCAT:-zstdcat} + +endofopts=0 +pattern_found=0 +grep_args="" +hyphen=0 +silent=0 + +prog=${0##*/} + +# handle being called 'zegrep' or 'zfgrep' +case $prog in + *egrep*) prog=zegrep; grep_args='-E';; + *fgrep*) prog=zfgrep; grep_args='-F';; + *) prog=zstdgrep;; +esac + +# skip all options and pass them on to grep taking care of options +# with arguments, and if -e was supplied + +while [ "$#" -gt 0 ] && [ "${endofopts}" -eq 0 ]; do + case "$1" in + # from GNU grep-2.5.1 -- keep in sync! + -[ABCDXdefm]) + if [ "$#" -lt 2 ]; then + printf '%s: missing argument for %s flag\n' "${prog}" "$1" >&2 + exit 1 + fi + case "$1" in + -e) + pattern="$2" + pattern_found=1 + shift 2 + break + ;; + -f) + pattern_found=2 + ;; + *) + ;; + esac + grep_args="${grep_args} $1 $2" + shift 2 + ;; + --) + shift + endofopts=1 + ;; + -) + hyphen=1 + shift + ;; + -h) + silent=1 + shift + ;; + -*) + grep_args="${grep_args} $1" + shift + ;; + *) + # pattern to grep for + endofopts=1 + ;; + esac +done + +# if no -e option was found, take next argument as grep-pattern +if [ "${pattern_found}" -lt 1 ]; then + if [ "$#" -ge 1 ]; then + pattern="$1" + shift + elif [ "${hyphen}" -gt 0 ]; then + pattern="-" + else + printf '%s: missing pattern\n' "${prog}" >&2 + exit 1 + fi +fi + +EXIT_CODE=0 +# call grep ... +if [ "$#" -lt 1 ]; then + # ... on stdin + set -f # Disable file name generation (globbing). + # shellcheck disable=SC2086 + "${zcat}" - | "${grep}" ${grep_args} -- "${pattern}" - + EXIT_CODE=$? + set +f +else + # ... on all files given on the command line + if [ "${silent}" -lt 1 ] && [ "$#" -gt 1 ]; then + grep_args="-H ${grep_args}" + fi + set -f + while [ "$#" -gt 0 ]; do + # shellcheck disable=SC2086 + if [ $pattern_found -eq 2 ]; then + "${zcat}" -- "$1" | "${grep}" --label="${1}" ${grep_args} -- - + else + "${zcat}" -- "$1" | "${grep}" --label="${1}" ${grep_args} -- "${pattern}" - + fi + [ "$?" -ne 0 ] && EXIT_CODE=1 + shift + done + set +f +fi + +exit "${EXIT_CODE}" diff --git a/bin/zstdless b/bin/zstdless new file mode 100644 index 0000000000000000000000000000000000000000..17726a4f6e1efb5328d0f2c116f20500149db41b --- /dev/null +++ b/bin/zstdless @@ -0,0 +1,8 @@ +#!/bin/sh + +zstd=${ZSTD:-zstd} + +# TODO: Address quirks and bugs tied to old versions of less, provide a mechanism to pass flags directly to zstd + +export LESSOPEN="|-${zstd} -cdfq %s" +exec less "$@" diff --git a/doc/Cupti/annotated.html b/doc/Cupti/annotated.html new file mode 100644 index 0000000000000000000000000000000000000000..e9bc055af4a168e770b8a27c1040432dc0ab3715 --- /dev/null +++ b/doc/Cupti/annotated.html @@ -0,0 +1,18062 @@ + + + + + + + + + + + + + + CUPTI :: CUPTI Documentation + + + --> + + + + + + + + + + + + + + + + + + +
+ +
+ + +
+ +
+
+

6. Data Structures

+
+
+

Here are the data structures with brief descriptions:

+
+
BufferInfo
+
BufferInfo + will be stored in the file for every buffer i.e for every + call of UtilDumpPcSamplingBufferInFile() API +
+
CUPTI::​PcSamplingUtil::​CUptiUtil_GetBufferInfoParams
+
Params for + CuptiUtilGetBufferInfo +
+
CUPTI::​PcSamplingUtil::​CUptiUtil_GetHeaderDataParams
+
Params for + CuptiUtilGetHeaderData +
+
CUPTI::​PcSamplingUtil::​CUptiUtil_GetPcSampDataParams
+
Params for + CuptiUtilGetPcSampData +
+
CUPTI::​PcSamplingUtil::​CUptiUtil_MergePcSampDataParams
+
Params for + CuptiUtilMergePcSampData +
+
CUPTI::​PcSamplingUtil::​CUptiUtil_PutPcSampDataParams
+
Params for + CuptiUtilPutPcSampData +
+
CUpti_Activity
+
The base activity record
+
CUpti_ActivityAPI
+
The activity record for a driver + or runtime API invocation +
+
CUpti_ActivityAutoBoostState
+
Device auto boost state + structure +
+
CUpti_ActivityBranch
+
The activity record for source + level result branch. (deprecated) +
+
CUpti_ActivityBranch2
+
The activity record for source + level result branch +
+
CUpti_ActivityCdpKernel
+
The activity record for CDP (CUDA + Dynamic Parallelism) kernel +
+
CUpti_ActivityContext
+
The activity record for a + context +
+
CUpti_ActivityCudaEvent
+
The activity record for CUDA + event +
+
CUpti_ActivityDevice
+
The activity record for a device. + (deprecated) +
+
CUpti_ActivityDevice2
+
The activity record for a device. + (deprecated) +
+
CUpti_ActivityDevice3
+
The activity record for a device. + (CUDA 7.0 onwards) +
+
CUpti_ActivityDevice4
+
The activity record for a device. + (CUDA 11.6 onwards) +
+
CUpti_ActivityDeviceAttribute
+
The activity record for a device + attribute +
+
CUpti_ActivityEnvironment
+
The activity record for CUPTI + environmental data +
+
CUpti_ActivityEvent
+
The activity record for a CUPTI + event +
+
CUpti_ActivityEventInstance
+
The activity record for a CUPTI + event with instance information +
+
CUpti_ActivityExternalCorrelation
+
The activity record for + correlation with external records +
+
CUpti_ActivityFunction
+
The activity record for + global/device functions +
+
CUpti_ActivityGlobalAccess
+
The activity record for + source-level global access. (deprecated) +
+
CUpti_ActivityGlobalAccess2
+
The activity record for + source-level global access. (deprecated in CUDA 9.0) +
+
CUpti_ActivityGlobalAccess3
+
The activity record for + source-level global access +
+
CUpti_ActivityGraphTrace
+
The activity record for trace of + graph execution +
+
CUpti_ActivityInstantaneousEvent
+
The activity record for an + instantaneous CUPTI event +
+
CUpti_ActivityInstantaneousEventInstance
+
The activity record for an + instantaneous CUPTI event with event domain instance + information +
+
CUpti_ActivityInstantaneousMetric
+
The activity record for an + instantaneous CUPTI metric +
+
CUpti_ActivityInstantaneousMetricInstance
+
The instantaneous activity record + for a CUPTI metric with instance information +
+
CUpti_ActivityInstructionCorrelation
+
The activity record for + source-level sass/source line-by-line correlation +
+
CUpti_ActivityInstructionExecution
+
The activity record for + source-level instruction execution +
+
CUpti_ActivityJit
+
The activity record for JIT + operations. This activity represents the JIT operations + (compile, load, store) of a CUmodule from the Compute + Cache. Gives the exact hashed path of where the cached + module is loaded from, or where the module will be stored + after Just-In-Time (JIT) compilation +
+
CUpti_ActivityKernel
+
The activity record for kernel. + (deprecated) +
+
CUpti_ActivityKernel2
+
The activity record for kernel. + (deprecated) +
+
CUpti_ActivityKernel3
+
The activity record for a kernel + (CUDA 6.5(with sm_52 support) onwards). (deprecated in + CUDA 9.0) +
+
CUpti_ActivityKernel4
+
The activity record for a kernel + (CUDA 9.0(with sm_70 support) onwards). (deprecated in + CUDA 11.0) +
+
CUpti_ActivityKernel5
+
The activity record for a kernel + (CUDA 11.0(with sm_80 support) onwards). (deprecated in + CUDA 11.2) This activity record represents a kernel + execution (CUPTI_ACTIVITY_KIND_KERNEL and + CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL) but is no longer + generated by CUPTI. Kernel activities are now reported + using the CUpti_ActivityKernel8 + activity record +
+
CUpti_ActivityKernel6
+
The activity record for kernel. + (deprecated in CUDA 11.6) +
+
CUpti_ActivityKernel7
+
The activity record for kernel. + (deprecated in CUDA 11.8) +
+
CUpti_ActivityKernel8
+
The activity record for + kernel +
+
CUpti_ActivityMarker
+
The activity record providing a + marker which is an instantaneous point in time. + (deprecated in CUDA 8.0) +
+
CUpti_ActivityMarker2
+
The activity record providing a + marker which is an instantaneous point in time +
+
CUpti_ActivityMarkerData
+
The activity record providing + detailed information for a marker +
+
CUpti_ActivityMemcpy
+
The activity record for memory + copies. (deprecated) +
+
CUpti_ActivityMemcpy3
+
The activity record for memory + copies. (deprecated in CUDA 11.1) +
+
CUpti_ActivityMemcpy4
+
The activity record for memory + copies. (deprecated in CUDA 11.6) +
+
CUpti_ActivityMemcpy5
+
The activity record for memory + copies +
+
CUpti_ActivityMemcpyPtoP
+
The activity record for + peer-to-peer memory copies +
+
CUpti_ActivityMemcpyPtoP2
+
The activity record for + peer-to-peer memory copies. (deprecated in CUDA + 11.1) +
+
CUpti_ActivityMemcpyPtoP3
+
The activity record for + peer-to-peer memory copies. (deprecated in CUDA + 11.6) +
+
CUpti_ActivityMemcpyPtoP4
+
The activity record for + peer-to-peer memory copies +
+
CUpti_ActivityMemory
+
The activity record for + memory +
+
CUpti_ActivityMemory2
+
The activity record for + memory +
+
CUpti_ActivityMemory3
+
The activity record for + memory +
+
CUpti_ActivityMemory3::​CUpti_ActivityMemory3::​PACKED_ALIGNMENT
+
+
CUpti_ActivityMemoryPool
+
The activity record for memory + pool +
+
CUpti_ActivityMemoryPool2
+
The activity record for memory + pool +
+
CUpti_ActivityMemset
+
The activity record for memset. + (deprecated) +
+
CUpti_ActivityMemset2
+
The activity record for memset. + (deprecated in CUDA 11.1) +
+
CUpti_ActivityMemset3
+
The activity record for memset. + (deprecated in CUDA 11.6) +
+
CUpti_ActivityMemset4
+
The activity record for + memset +
+
CUpti_ActivityMetric
+
The activity record for a CUPTI + metric +
+
CUpti_ActivityMetricInstance
+
The activity record for a CUPTI + metric with instance information +
+
CUpti_ActivityModule
+
The activity record for a CUDA + module +
+
CUpti_ActivityName
+
The activity record providing a + name +
+
CUpti_ActivityNvLink
+
NVLink information. (deprecated in + CUDA 9.0) +
+
CUpti_ActivityNvLink2
+
NVLink information. (deprecated in + CUDA 10.0) +
+
CUpti_ActivityNvLink3
+
NVLink information
+
CUpti_ActivityNvLink4
+
NVLink information
+
CUpti_ActivityObjectKindId
+
Identifiers for object kinds as + specified by CUpti_ActivityObjectKind +
+
CUpti_ActivityOpenAcc
+
The base activity record for + OpenAcc records +
+
CUpti_ActivityOpenAccData
+
The activity record for OpenACC + data +
+
CUpti_ActivityOpenAccLaunch
+
The activity record for OpenACC + launch +
+
CUpti_ActivityOpenAccOther
+
The activity record for OpenACC + other +
+
CUpti_ActivityOpenMp
+
The base activity record for + OpenMp records +
+
CUpti_ActivityOverhead
+
The activity record for CUPTI and + driver overheads +
+
CUpti_ActivityPcie
+
PCI devices information required + to construct topology +
+
CUpti_ActivityPCSampling
+
The activity record for PC + sampling. (deprecated in CUDA 8.0) +
+
CUpti_ActivityPCSampling2
+
The activity record for PC + sampling. (deprecated in CUDA 9.0) +
+
CUpti_ActivityPCSampling3
+
The activity record for PC + sampling +
+
CUpti_ActivityPCSamplingConfig
+
PC sampling configuration + structure +
+
CUpti_ActivityPCSamplingRecordInfo
+
The activity record for record + status for PC sampling +
+
CUpti_ActivityPreemption
+
The activity record for a + preemption of a CDP kernel +
+
CUpti_ActivitySharedAccess
+
The activity record for + source-level shared access +
+
CUpti_ActivitySourceLocator
+
The activity record for source + locator +
+
CUpti_ActivityStream
+
The activity record for CUDA + stream +
+
CUpti_ActivitySynchronization
+
The activity record for + synchronization management +
+
CUpti_ActivityUnifiedMemoryCounter
+
The activity record for Unified + Memory counters (deprecated in CUDA 7.0) +
+
CUpti_ActivityUnifiedMemoryCounter2
+
The activity record for Unified + Memory counters (CUDA 7.0 and beyond) +
+
CUpti_ActivityUnifiedMemoryCounterConfig
+
Unified Memory counters + configuration structure +
+
CUpti_CallbackData
+
Data passed into a runtime or + driver API callback function +
+
CUpti_EventGroupSet
+
A set of event groups
+
CUpti_EventGroupSets
+
A set of event group sets
+
CUpti_GetCubinCrcParams
+
Params for cuptiGetCubinCrc
+
CUpti_GetSassToSourceCorrelationParams
+
Params for + cuptiGetSassToSourceCorrelation +
+
CUpti_GraphData
+
CUDA graphs data passed into a + resource callback function +
+
CUpti_MetricValue
+
A metric value
+
CUpti_ModuleResourceData
+
Module data passed into a resource + callback function +
+
CUpti_NvtxData
+
Data passed into a NVTX callback + function +
+
CUpti_PCSamplingConfigurationInfo
+
PC sampling configuration + information structure +
+
CUpti_PCSamplingConfigurationInfoParams
+
PC sampling configuration + structure +
+
CUpti_PCSamplingData
+
Collected PC Sampling data
+
CUpti_PCSamplingDisableParams
+
Params for + cuptiPCSamplingDisable +
+
CUpti_PCSamplingEnableParams
+
Params for + cuptiPCSamplingEnable +
+
CUpti_PCSamplingGetDataParams
+
Params for + cuptiPCSamplingEnable +
+
CUpti_PCSamplingGetNumStallReasonsParams
+
Params for + cuptiPCSamplingGetNumStallReasons +
+
CUpti_PCSamplingGetStallReasonsParams
+
Params for + cuptiPCSamplingGetStallReasons +
+
CUpti_PCSamplingPCData
+
PC Sampling data
+
CUpti_PCSamplingStallReason
+
PC Sampling stall reasons
+
CUpti_PCSamplingStartParams
+
Params for + cuptiPCSamplingStart +
+
CUpti_PCSamplingStopParams
+
Params for + cuptiPCSamplingStop +
+
CUpti_Profiler_BeginPass_Params
+
Params for + cuptiProfilerBeginPass +
+
CUpti_Profiler_BeginSession_Params
+
Params for + cuptiProfilerBeginSession +
+
CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params
+
Params for + cuptiProfilerCounterDataImageCalculateScratchBufferSize +
+
CUpti_Profiler_CounterDataImage_CalculateSize_Params
+
Params for + cuptiProfilerCounterDataImageCalculateSize +
+
CUpti_Profiler_CounterDataImage_Initialize_Params
+
Params for + cuptiProfilerCounterDataImageInitialize +
+
CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params
+
Params for + cuptiProfilerCounterDataImageInitializeScratchBuffer +
+
CUpti_Profiler_CounterDataImageOptions
+
Input parameter to define the + counterDataImage +
+
CUpti_Profiler_DeInitialize_Params
+
Default parameter for + cuptiProfilerDeInitialize +
+
CUpti_Profiler_DeviceSupported_Params
+
Params for + cuptiProfilerDeviceSupported +
+
CUpti_Profiler_DisableProfiling_Params
+
Params for + cuptiProfilerDisableProfiling +
+
CUpti_Profiler_EnableProfiling_Params
+
Params for + cuptiProfilerEnableProfiling +
+
CUpti_Profiler_EndPass_Params
+
Params for + cuptiProfilerEndPass +
+
CUpti_Profiler_EndSession_Params
+
Params for + cuptiProfilerEndSession +
+
CUpti_Profiler_FlushCounterData_Params
+
Params for + cuptiProfilerFlushCounterData +
+
CUpti_Profiler_GetCounterAvailability_Params
+
Params for + cuptiProfilerGetCounterAvailability +
+
CUpti_Profiler_Initialize_Params
+
Default parameter for + cuptiProfilerInitialize +
+
CUpti_Profiler_IsPassCollected_Params
+
Params for + cuptiProfilerIsPassCollected +
+
CUpti_Profiler_SetConfig_Params
+
Params for + cuptiProfilerSetConfig +
+
CUpti_Profiler_UnsetConfig_Params
+
Params for + cuptiProfilerUnsetConfig +
+
CUpti_ResourceData
+
Data passed into a resource + callback function +
+
CUpti_SynchronizeData
+
Data passed into a synchronize + callback function +
+
Header
+
Header + info will be stored in file +
+
NV::​Cupti::​Checkpoint::​CUpti_Checkpoint
+
Configuration and handle for a + CUPTI Checkpoint +
+
PcSamplingStallReasons
+
All available stall reasons name + and respective indexes will be stored in it +
+
+
+
+
+

6.1. BufferInfo Struct Reference

+

[CUPTI PC Sampling Utility API] +

+
+

+
+

Public Variables

+
+
uint64_t  bufferByteSize
+
+
uint64_t  numSelectedStallReasons
+
+
size_t  numStallReasons
+
+
uint64_t  recordCount
+
+
+
+

Variables

+
+
+ uint64_t BufferInfo::bufferByteSize [inherited]
+
+
+

Buffer size in Bytes.

+
+
+
+ uint64_t BufferInfo::numSelectedStallReasons [inherited]
+
+
+

Total number of stall reasons in single record.

+
+
+
+ size_t BufferInfo::numStallReasons [inherited]
+
+
+

Count of all stall reasons supported on the GPU

+
+
+
+ uint64_t BufferInfo::recordCount [inherited]
+
+
+

Total number of PC records.

+
+
+
+
+
+
+

6.2. CUPTI::PcSamplingUtil::CUptiUtil_GetBufferInfoParams Struct Reference

+

[CUPTI PC Sampling Utility API] +

+
+

+
+

Public Variables

+
+
struct BufferInfo bufferInfoData
+
+
+ std::ifstream + * fileHandler
+
+
size_t  size
+
+
+
+

Variables

+
+
+ struct BufferInfoCUPTI::​PcSamplingUtil::​CUptiUtil_GetBufferInfoParams::bufferInfoData [inherited]
+
+
+

Buffer Info.

+
+
+
+ + + std::ifstream + * CUPTI::​PcSamplingUtil::​CUptiUtil_GetBufferInfoParams::fileHandler [inherited]
+
+
+

File handle.

+
+
+
+ size_t CUPTI::​PcSamplingUtil::​CUptiUtil_GetBufferInfoParams::size [inherited]
+
+
+

Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize CUPTI client should set the size of the structure. It will + be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+
+
+
+

6.3. CUPTI::PcSamplingUtil::CUptiUtil_GetHeaderDataParams Struct Reference

+

[CUPTI PC Sampling Utility API] +

+
+

+
+

Public Variables

+
+
+ std::ifstream + * fileHandler
+
+
struct Header headerInfo
+
+
size_t  size
+
+
+
+

Variables

+
+
+ + + std::ifstream + * CUPTI::​PcSamplingUtil::​CUptiUtil_GetHeaderDataParams::fileHandler [inherited]
+
+
+

File handle.

+
+
+
+ struct HeaderCUPTI::​PcSamplingUtil::​CUptiUtil_GetHeaderDataParams::headerInfo [inherited]
+
+
+

Header Info. +

+
+
+
+ size_t CUPTI::​PcSamplingUtil::​CUptiUtil_GetHeaderDataParams::size [inherited]
+
+
+

Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize CUPTI client should set the size of the structure. It will + be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+
+
+
+

6.4. CUPTI::PcSamplingUtil::CUptiUtil_GetPcSampDataParams Struct Reference

+

[CUPTI PC Sampling Utility API] +

+
+

+
+

Public Variables

+
+
PcSamplingBufferType bufferType
+
+
+ std::ifstream + * fileHandler
+
+
size_t  numAttributes
+
+
BufferInfo + * pBufferInfoData
+
+
CUpti_PCSamplingConfigurationInfo + * pPCSamplingConfigurationInfo
+
+
PcSamplingStallReasons + * pPcSamplingStallReasons
+
+
+ void + * pSamplingData
+
+
size_t  size
+
+
+
+

Variables

+
+
+ PcSamplingBufferTypeCUPTI::​PcSamplingUtil::​CUptiUtil_GetPcSampDataParams::bufferType [inherited]
+
+
+

Type of buffer to store in file

+
+
+
+ + + std::ifstream + * CUPTI::​PcSamplingUtil::​CUptiUtil_GetPcSampDataParams::fileHandler [inherited]
+
+
+

File handle.

+
+
+
+ size_t CUPTI::​PcSamplingUtil::​CUptiUtil_GetPcSampDataParams::numAttributes [inherited]
+
+
+

Number of configuration attributes

+
+
+
+ BufferInfo + * CUPTI::​PcSamplingUtil::​CUptiUtil_GetPcSampDataParams::pBufferInfoData [inherited]
+
+
+

Pointer to collected buffer info using CuptiUtilGetBufferInfo

+
+
+
+ CUpti_PCSamplingConfigurationInfo + * CUPTI::​PcSamplingUtil::​CUptiUtil_GetPcSampDataParams::pPCSamplingConfigurationInfo [inherited]
+
+ +
+
+ PcSamplingStallReasons + * CUPTI::​PcSamplingUtil::​CUptiUtil_GetPcSampDataParams::pPcSamplingStallReasons [inherited]
+
+
+

Refer PcSamplingStallReasons. For stallReasons field of PcSamplingStallReasons it is expected to allocate memory for each string element of array. +

+
+
+
+ + + void + * CUPTI::​PcSamplingUtil::​CUptiUtil_GetPcSampDataParams::pSamplingData [inherited]
+
+
+

Pointer to allocated memory to store retrieved data from file.

+
+
+
+ size_t CUPTI::​PcSamplingUtil::​CUptiUtil_GetPcSampDataParams::size [inherited]
+
+
+

Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize CUPTI client should set the size of the structure. It will + be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+
+
+
+

6.5. CUPTI::PcSamplingUtil::CUptiUtil_MergePcSampDataParams Struct Reference

+

[CUPTI PC Sampling Utility API] +

+
+

+
+

Public Variables

+
+
+ + + * MergedPcSampDataBuffers
+
+
CUpti_PCSamplingData + * PcSampDataBuffer
+
+
+ size_t + * numMergedBuffer
+
+
size_t  numberOfBuffers
+
+
size_t  size
+
+
+
+

Variables

+
+
+ + + + + * CUPTI::​PcSamplingUtil::​CUptiUtil_MergePcSampDataParams::MergedPcSampDataBuffers [inherited]
+
+
+

Pointer to array of merged buffers as per the range id.

+
+
+
+ CUpti_PCSamplingData + * CUPTI::​PcSamplingUtil::​CUptiUtil_MergePcSampDataParams::PcSampDataBuffer [inherited]
+
+
+

Pointer to array of buffers to merge

+
+
+
+ + + size_t + * CUPTI::​PcSamplingUtil::​CUptiUtil_MergePcSampDataParams::numMergedBuffer [inherited]
+
+
+

Number of merged buffers.

+
+
+
+ size_t CUPTI::​PcSamplingUtil::​CUptiUtil_MergePcSampDataParams::numberOfBuffers [inherited]
+
+
+

Number of buffers to merge.

+
+
+
+ size_t CUPTI::​PcSamplingUtil::​CUptiUtil_MergePcSampDataParams::size [inherited]
+
+
+

Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize CUPTI client should set the size of the structure. It will + be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+
+
+
+

6.6. CUPTI::PcSamplingUtil::CUptiUtil_PutPcSampDataParams Struct Reference

+

[CUPTI PC Sampling Utility API] +

+
+

+
+

Public Variables

+
+
PcSamplingBufferType bufferType
+
+
const + char + * fileName
+
+
size_t  numAttributes
+
+
CUpti_PCSamplingConfigurationInfo + * pPCSamplingConfigurationInfo
+
+
PcSamplingStallReasons + * pPcSamplingStallReasons
+
+
+ void + * pSamplingData
+
+
size_t  size
+
+
+
+

Variables

+
+
+ PcSamplingBufferTypeCUPTI::​PcSamplingUtil::​CUptiUtil_PutPcSampDataParams::bufferType [inherited]
+
+
+

Type of buffer to store in file

+
+
+
+ const + + char + * CUPTI::​PcSamplingUtil::​CUptiUtil_PutPcSampDataParams::fileName [inherited]
+
+
+

File name to store buffer into it.

+
+
+
+ size_t CUPTI::​PcSamplingUtil::​CUptiUtil_PutPcSampDataParams::numAttributes [inherited]
+
+
+

Number of configured attributes

+
+
+
+ CUpti_PCSamplingConfigurationInfo + * CUPTI::​PcSamplingUtil::​CUptiUtil_PutPcSampDataParams::pPCSamplingConfigurationInfo [inherited]
+
+
+

Refer CUpti_PCSamplingConfigurationInfo It is expected to provide configuration details of at least CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON attribute. + +

+
+
+
+ PcSamplingStallReasons + * CUPTI::​PcSamplingUtil::​CUptiUtil_PutPcSampDataParams::pPcSamplingStallReasons [inherited]
+
+
+

Refer PcSamplingStallReasons. +

+
+
+
+ + + void + * CUPTI::​PcSamplingUtil::​CUptiUtil_PutPcSampDataParams::pSamplingData [inherited]
+
+
+

PC sampling buffer.

+
+
+
+ size_t CUPTI::​PcSamplingUtil::​CUptiUtil_PutPcSampDataParams::size [inherited]
+
+
+

Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize CUPTI client should set the size of the structure. It will + be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+
+
+
+

6.7. CUpti_Activity Struct Reference

+

[CUPTI Activity API] +

+
+

The activity API uses a CUpti_Activity as a generic representation for any activity. The 'kind' field is used to determine the specific activity kind, and from + that the CUpti_Activity object can be cast to the specific activity record type appropriate for that kind. +

+

Note that all activity record types are padded and aligned to ensure that each member of the record is naturally aligned.

+

See also:

+

CUpti_ActivityKind

+

+
+

Public Variables

+
+
CUpti_ActivityKind kind
+
+
+
+

Variables

+
+
+ CUpti_ActivityKindCUpti_Activity::kind [inherited]
+
+
+

The kind of this activity.

+
+
+
+
+
+
+

6.8. CUpti_ActivityAPI Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents an invocation of a driver or runtime API (CUPTI_ACTIVITY_KIND_DRIVER and CUPTI_ACTIVITY_KIND_RUNTIME). + +

+
+

Public Variables

+
+
CUpti_CallbackId cbid
+
+
uint32_t  correlationId
+
+
uint64_t  end
+
+
CUpti_ActivityKind kind
+
+
uint32_t  processId
+
+
uint32_t  returnValue
+
+
uint64_t  start
+
+
uint32_t  threadId
+
+
+
+

Variables

+
+
+ CUpti_CallbackIdCUpti_ActivityAPI::cbid [inherited]
+
+
+

The ID of the driver or runtime function.

+
+
+
+ uint32_t CUpti_ActivityAPI::correlationId [inherited]
+
+
+

The correlation ID of the driver or runtime CUDA function. Each function invocation is assigned a unique correlation ID that + is identical to the correlation ID in the memcpy, memset, or kernel activity record that is associated with this function. + +

+
+
+
+ uint64_t CUpti_ActivityAPI::end [inherited]
+
+
+

The end timestamp for the function, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information + could not be collected for the function. +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityAPI::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_DRIVER, CUPTI_ACTIVITY_KIND_RUNTIME, or CUPTI_ACTIVITY_KIND_INTERNAL_LAUNCH_API. + +

+
+
+
+ uint32_t CUpti_ActivityAPI::processId [inherited]
+
+
+

The ID of the process where the driver or runtime CUDA function is executing.

+
+
+
+ uint32_t CUpti_ActivityAPI::returnValue [inherited]
+
+
+

The return value for the function. For a CUDA driver function with will be a CUresult value, and for a CUDA runtime function + this will be a cudaError_t value. +

+
+
+
+ uint64_t CUpti_ActivityAPI::start [inherited]
+
+
+

The start timestamp for the function, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the function. +

+
+
+
+ uint32_t CUpti_ActivityAPI::threadId [inherited]
+
+
+

The ID of the thread where the driver or runtime CUDA function is executing.

+
+
+
+
+
+
+

6.9. CUpti_ActivityAutoBoostState Struct Reference

+

[CUPTI Activity API] +

+
+

This structure defines auto boost state for a device. See function cuptiGetAutoBoostState

+
+

Public Variables

+
+
uint32_t  enabled
+
+
uint32_t  pid
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityAutoBoostState::enabled [inherited]
+
+
+

Returned auto boost state. 1 is returned in case auto boost is enabled, 0 otherwise

+
+
+
+ uint32_t CUpti_ActivityAutoBoostState::pid [inherited]
+
+
+

Id of process that has set the current boost state. The value will be CUPTI_AUTO_BOOST_INVALID_CLIENT_PID if the user does + not have the permission to query process ids or there is an error in querying the process id. +

+
+
+
+
+
+
+

6.10. CUpti_ActivityBranch Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record the locations of the branches in the source (CUPTI_ACTIVITY_KIND_BRANCH). Branch activities are now reported + using the CUpti_ActivityBranch2 activity record. +

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
uint32_t  diverged
+
+
uint32_t  executed
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pcOffset
+
+
uint32_t  sourceLocatorId
+
+
uint64_t  threadsExecuted
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityBranch::correlationId [inherited]
+
+
+

The correlation ID of the kernel to which this result is associated.

+
+
+
+ uint32_t CUpti_ActivityBranch::diverged [inherited]
+
+
+

Number of times this branch diverged

+
+
+
+ uint32_t CUpti_ActivityBranch::executed [inherited]
+
+
+

The number of times this instruction was executed per warp. It will be incremented regardless of predicate or condition code. + +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityBranch::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_BRANCH.

+
+
+
+ uint32_t CUpti_ActivityBranch::pcOffset [inherited]
+
+
+

The pc offset for the branch.

+
+
+
+ uint32_t CUpti_ActivityBranch::sourceLocatorId [inherited]
+
+
+

The ID for source locator.

+
+
+
+ uint64_t CUpti_ActivityBranch::threadsExecuted [inherited]
+
+
+

This increments each time when this instruction is executed by number of threads that executed this instruction

+
+
+
+
+
+
+

6.11. CUpti_ActivityBranch2 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record the locations of the branches in the source (CUPTI_ACTIVITY_KIND_BRANCH).

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
uint32_t  diverged
+
+
uint32_t  executed
+
+
uint32_t  functionId
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pad
+
+
uint32_t  pcOffset
+
+
uint32_t  sourceLocatorId
+
+
uint64_t  threadsExecuted
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityBranch2::correlationId [inherited]
+
+
+

The correlation ID of the kernel to which this result is associated.

+
+
+
+ uint32_t CUpti_ActivityBranch2::diverged [inherited]
+
+
+

Number of times this branch diverged

+
+
+
+ uint32_t CUpti_ActivityBranch2::executed [inherited]
+
+
+

The number of times this instruction was executed per warp. It will be incremented regardless of predicate or condition code. + +

+
+
+
+ uint32_t CUpti_ActivityBranch2::functionId [inherited]
+
+
+

Correlation ID with global/device function name

+
+
+
+ CUpti_ActivityKindCUpti_ActivityBranch2::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_BRANCH.

+
+
+
+ uint32_t CUpti_ActivityBranch2::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityBranch2::pcOffset [inherited]
+
+
+

The pc offset for the branch.

+
+
+
+ uint32_t CUpti_ActivityBranch2::sourceLocatorId [inherited]
+
+
+

The ID for source locator.

+
+
+
+ uint64_t CUpti_ActivityBranch2::threadsExecuted [inherited]
+
+
+

This increments each time when this instruction is executed by number of threads that executed this instruction

+
+
+
+
+
+
+

6.12. CUpti_ActivityCdpKernel Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a CDP kernel execution.

+
+

Public Variables

+
+
int32_t  blockX
+
+
int32_t  blockY
+
+
int32_t  blockZ
+
+
uint64_t  completed
+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
int32_t  dynamicSharedMemory
+
+
uint64_t  end
+
+
uint8_t  executed
+
+
int64_t  gridId
+
+
int32_t  gridX
+
+
int32_t  gridY
+
+
int32_t  gridZ
+
+
CUpti_ActivityKind kind
+
+
uint32_t  localMemoryPerThread
+
+
uint32_t  localMemoryTotal
+
+
const + char + * name
+
+
uint32_t  parentBlockX
+
+
uint32_t  parentBlockY
+
+
uint32_t  parentBlockZ
+
+
int64_t  parentGridId
+
+
uint64_t  queued
+
+
uint16_t  registersPerThread
+
+
uint8_t  requested
+
+
uint8_t  sharedMemoryConfig
+
+
uint64_t  start
+
+
int32_t  staticSharedMemory
+
+
uint32_t  streamId
+
+
uint64_t  submitted
+
+
+
+

Variables

+
+
+ int32_t CUpti_ActivityCdpKernel::blockX [inherited]
+
+
+

The X-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityCdpKernel::blockY [inherited]
+
+
+

The Y-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityCdpKernel::blockZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ uint64_t CUpti_ActivityCdpKernel::completed [inherited]
+
+
+

The timestamp when kernel is marked as completed, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion + time is unknown. +

+
+
+
+ uint32_t CUpti_ActivityCdpKernel::contextId [inherited]
+
+
+

The ID of the context where the kernel is executing.

+
+
+
+ uint32_t CUpti_ActivityCdpKernel::correlationId [inherited]
+
+
+

The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation + ID in the driver API activity record that launched the kernel. +

+
+
+
+ uint32_t CUpti_ActivityCdpKernel::deviceId [inherited]
+
+
+

The ID of the device where the kernel is executing.

+
+
+
+ int32_t CUpti_ActivityCdpKernel::dynamicSharedMemory [inherited]
+
+
+

The dynamic shared memory reserved for the kernel, in bytes.

+
+
+
+ uint64_t CUpti_ActivityCdpKernel::end [inherited]
+
+
+

The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ uint8_t CUpti_ActivityCdpKernel::executed [inherited]
+
+
+

The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ int64_t CUpti_ActivityCdpKernel::gridId [inherited]
+
+
+

The grid ID of the kernel. Each kernel execution is assigned a unique grid ID.

+
+
+
+ int32_t CUpti_ActivityCdpKernel::gridX [inherited]
+
+
+

The X-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityCdpKernel::gridY [inherited]
+
+
+

The Y-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityCdpKernel::gridZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityCdpKernel::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_CDP_KERNEL

+
+
+
+ uint32_t CUpti_ActivityCdpKernel::localMemoryPerThread [inherited]
+
+
+

The amount of local memory reserved for each thread, in bytes.

+
+
+
+ uint32_t CUpti_ActivityCdpKernel::localMemoryTotal [inherited]
+
+
+

The total amount of local memory reserved for the kernel, in bytes.

+
+
+
+ const + + char + * CUpti_ActivityCdpKernel::name [inherited]
+
+
+

The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be + modified. +

+
+
+
+ uint32_t CUpti_ActivityCdpKernel::parentBlockX [inherited]
+
+
+

The X-dimension of the parent block.

+
+
+
+ uint32_t CUpti_ActivityCdpKernel::parentBlockY [inherited]
+
+
+

The Y-dimension of the parent block.

+
+
+
+ uint32_t CUpti_ActivityCdpKernel::parentBlockZ [inherited]
+
+
+

The Z-dimension of the parent block.

+
+
+
+ int64_t CUpti_ActivityCdpKernel::parentGridId [inherited]
+
+
+

The grid ID of the parent kernel.

+
+
+
+ uint64_t CUpti_ActivityCdpKernel::queued [inherited]
+
+
+

The timestamp when kernel is queued up, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the queued time is unknown. + +

+
+
+
+ uint16_t CUpti_ActivityCdpKernel::registersPerThread [inherited]
+
+
+

The number of registers required for each thread executing the kernel.

+
+
+
+ uint8_t CUpti_ActivityCdpKernel::requested [inherited]
+
+
+

The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ uint8_t CUpti_ActivityCdpKernel::sharedMemoryConfig [inherited]
+
+
+

The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. + +

+
+
+
+ uint64_t CUpti_ActivityCdpKernel::start [inherited]
+
+
+

The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ int32_t CUpti_ActivityCdpKernel::staticSharedMemory [inherited]
+
+
+

The static shared memory allocated for the kernel, in bytes.

+
+
+
+ uint32_t CUpti_ActivityCdpKernel::streamId [inherited]
+
+
+

The ID of the stream where the kernel is executing.

+
+
+
+ uint64_t CUpti_ActivityCdpKernel::submitted [inherited]
+
+
+

The timestamp when kernel is submitted to the gpu, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the submission + time is unknown. +

+
+
+
+
+
+
+

6.13. CUpti_ActivityContext Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents information about a context (CUPTI_ACTIVITY_KIND_CONTEXT).

+
+

Public Variables

+
+
uint16_t  computeApiKind
+
+
uint32_t  contextId
+
+
uint32_t  deviceId
+
+
CUpti_ActivityKind kind
+
+
uint16_t  nullStreamId
+
+
+
+

Variables

+
+
+ uint16_t CUpti_ActivityContext::computeApiKind [inherited]
+
+
+

The compute API kind.

+

See also:

+

CUpti_ActivityComputeApiKind

+

+
+
+
+ uint32_t CUpti_ActivityContext::contextId [inherited]
+
+
+

The context ID.

+
+
+
+ uint32_t CUpti_ActivityContext::deviceId [inherited]
+
+
+

The device ID.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityContext::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_CONTEXT.

+
+
+
+ uint16_t CUpti_ActivityContext::nullStreamId [inherited]
+
+
+

The ID for the NULL stream in this context

+
+
+
+
+
+
+

6.14. CUpti_ActivityCudaEvent Struct Reference

+

[CUPTI Activity API] +

+
+

This activity is used to track recorded events. (CUPTI_ACTIVITY_KIND_CUDA_EVENT).

+
+

Public Variables

+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  eventId
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pad
+
+
uint32_t  streamId
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityCudaEvent::contextId [inherited]
+
+
+

The ID of the context where the event was recorded.

+
+
+
+ uint32_t CUpti_ActivityCudaEvent::correlationId [inherited]
+
+
+

The correlation ID of the API to which this result is associated.

+
+
+
+ uint32_t CUpti_ActivityCudaEvent::eventId [inherited]
+
+
+

A unique event ID to identify the event record.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityCudaEvent::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_CUDA_EVENT.

+
+
+
+ uint32_t CUpti_ActivityCudaEvent::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityCudaEvent::streamId [inherited]
+
+
+

The compute stream where the event was recorded.

+
+
+
+
+
+
+

6.15. CUpti_ActivityDevice Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents information about a GPU device (CUPTI_ACTIVITY_KIND_DEVICE). Device activity is now reported + using the CUpti_ActivityDevice4 activity record. +

+
+

Public Variables

+
+
uint32_t  computeCapabilityMajor
+
+
uint32_t  computeCapabilityMinor
+
+
uint32_t  constantMemorySize
+
+
uint32_t  coreClockRate
+
+
CUpti_ActivityFlag flags
+
+
uint64_t  globalMemoryBandwidth
+
+
uint64_t  globalMemorySize
+
+
uint32_t  id
+
+
CUpti_ActivityKind kind
+
+
uint32_t  l2CacheSize
+
+
uint32_t  maxBlockDimX
+
+
uint32_t  maxBlockDimY
+
+
uint32_t  maxBlockDimZ
+
+
uint32_t  maxBlocksPerMultiprocessor
+
+
uint32_t  maxGridDimX
+
+
uint32_t  maxGridDimY
+
+
uint32_t  maxGridDimZ
+
+
uint32_t  maxIPC
+
+
uint32_t  maxRegistersPerBlock
+
+
uint32_t  maxSharedMemoryPerBlock
+
+
uint32_t  maxThreadsPerBlock
+
+
uint32_t  maxWarpsPerMultiprocessor
+
+
const + char + * name
+
+
uint32_t  numMemcpyEngines
+
+
uint32_t  numMultiprocessors
+
+
uint32_t  numThreadsPerWarp
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityDevice::computeCapabilityMajor [inherited]
+
+
+

Compute capability for the device, major number.

+
+
+
+ uint32_t CUpti_ActivityDevice::computeCapabilityMinor [inherited]
+
+
+

Compute capability for the device, minor number.

+
+
+
+ uint32_t CUpti_ActivityDevice::constantMemorySize [inherited]
+
+
+

The amount of constant memory on the device, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice::coreClockRate [inherited]
+
+
+

The core clock rate of the device, in kHz.

+
+
+
+ CUpti_ActivityFlagCUpti_ActivityDevice::flags [inherited]
+
+
+

The flags associated with the device.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint64_t CUpti_ActivityDevice::globalMemoryBandwidth [inherited]
+
+
+

The global memory bandwidth available on the device, in kBytes/sec.

+
+
+
+ uint64_t CUpti_ActivityDevice::globalMemorySize [inherited]
+
+
+

The amount of global memory on the device, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice::id [inherited]
+
+
+

The device ID.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityDevice::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_DEVICE.

+
+
+
+ uint32_t CUpti_ActivityDevice::l2CacheSize [inherited]
+
+
+

The size of the L2 cache on the device, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice::maxBlockDimX [inherited]
+
+
+

Maximum allowed X dimension for a block.

+
+
+
+ uint32_t CUpti_ActivityDevice::maxBlockDimY [inherited]
+
+
+

Maximum allowed Y dimension for a block.

+
+
+
+ uint32_t CUpti_ActivityDevice::maxBlockDimZ [inherited]
+
+
+

Maximum allowed Z dimension for a block.

+
+
+
+ uint32_t CUpti_ActivityDevice::maxBlocksPerMultiprocessor [inherited]
+
+
+

Maximum number of blocks that can be present on a multiprocessor at any given time.

+
+
+
+ uint32_t CUpti_ActivityDevice::maxGridDimX [inherited]
+
+
+

Maximum allowed X dimension for a grid.

+
+
+
+ uint32_t CUpti_ActivityDevice::maxGridDimY [inherited]
+
+
+

Maximum allowed Y dimension for a grid.

+
+
+
+ uint32_t CUpti_ActivityDevice::maxGridDimZ [inherited]
+
+
+

Maximum allowed Z dimension for a grid.

+
+
+
+ uint32_t CUpti_ActivityDevice::maxIPC [inherited]
+
+
+

The maximum "instructions per cycle" possible on each device multiprocessor.

+
+
+
+ uint32_t CUpti_ActivityDevice::maxRegistersPerBlock [inherited]
+
+
+

Maximum number of registers that can be allocated to a block.

+
+
+
+ uint32_t CUpti_ActivityDevice::maxSharedMemoryPerBlock [inherited]
+
+
+

Maximum amount of shared memory that can be assigned to a block, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice::maxThreadsPerBlock [inherited]
+
+
+

Maximum number of threads allowed in a block.

+
+
+
+ uint32_t CUpti_ActivityDevice::maxWarpsPerMultiprocessor [inherited]
+
+
+

Maximum number of warps that can be present on a multiprocessor at any given time.

+
+
+
+ const + + char + * CUpti_ActivityDevice::name [inherited]
+
+
+

The device name. This name is shared across all activity records representing instances of the device, and so should not + be modified. +

+
+
+
+ uint32_t CUpti_ActivityDevice::numMemcpyEngines [inherited]
+
+
+

Number of memory copy engines on the device.

+
+
+
+ uint32_t CUpti_ActivityDevice::numMultiprocessors [inherited]
+
+
+

Number of multiprocessors on the device.

+
+
+
+ uint32_t CUpti_ActivityDevice::numThreadsPerWarp [inherited]
+
+
+

The number of threads per warp on the device.

+
+
+
+
+
+
+

6.16. CUpti_ActivityDevice2 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents information about a GPU device (CUPTI_ACTIVITY_KIND_DEVICE). Device activity is now reported + using the CUpti_ActivityDevice4 activity record. +

+
+

Public Variables

+
+
uint32_t  computeCapabilityMajor
+
+
uint32_t  computeCapabilityMinor
+
+
uint32_t  constantMemorySize
+
+
uint32_t  coreClockRate
+
+
uint32_t  eccEnabled
+
+
CUpti_ActivityFlag flags
+
+
uint64_t  globalMemoryBandwidth
+
+
uint64_t  globalMemorySize
+
+
uint32_t  id
+
+
CUpti_ActivityKind kind
+
+
uint32_t  l2CacheSize
+
+
uint32_t  maxBlockDimX
+
+
uint32_t  maxBlockDimY
+
+
uint32_t  maxBlockDimZ
+
+
uint32_t  maxBlocksPerMultiprocessor
+
+
uint32_t  maxGridDimX
+
+
uint32_t  maxGridDimY
+
+
uint32_t  maxGridDimZ
+
+
uint32_t  maxIPC
+
+
uint32_t  maxRegistersPerBlock
+
+
uint32_t  maxRegistersPerMultiprocessor
+
+
uint32_t  maxSharedMemoryPerBlock
+
+
uint32_t  maxSharedMemoryPerMultiprocessor
+
+
uint32_t  maxThreadsPerBlock
+
+
uint32_t  maxWarpsPerMultiprocessor
+
+
const + char + * name
+
+
uint32_t  numMemcpyEngines
+
+
uint32_t  numMultiprocessors
+
+
uint32_t  numThreadsPerWarp
+
+
uint32_t  pad
+
+
CUuuid  uuid
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityDevice2::computeCapabilityMajor [inherited]
+
+
+

Compute capability for the device, major number.

+
+
+
+ uint32_t CUpti_ActivityDevice2::computeCapabilityMinor [inherited]
+
+
+

Compute capability for the device, minor number.

+
+
+
+ uint32_t CUpti_ActivityDevice2::constantMemorySize [inherited]
+
+
+

The amount of constant memory on the device, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice2::coreClockRate [inherited]
+
+
+

The core clock rate of the device, in kHz.

+
+
+
+ uint32_t CUpti_ActivityDevice2::eccEnabled [inherited]
+
+
+

ECC enabled flag for device

+
+
+
+ CUpti_ActivityFlagCUpti_ActivityDevice2::flags [inherited]
+
+
+

The flags associated with the device.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint64_t CUpti_ActivityDevice2::globalMemoryBandwidth [inherited]
+
+
+

The global memory bandwidth available on the device, in kBytes/sec.

+
+
+
+ uint64_t CUpti_ActivityDevice2::globalMemorySize [inherited]
+
+
+

The amount of global memory on the device, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice2::id [inherited]
+
+
+

The device ID.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityDevice2::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_DEVICE.

+
+
+
+ uint32_t CUpti_ActivityDevice2::l2CacheSize [inherited]
+
+
+

The size of the L2 cache on the device, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice2::maxBlockDimX [inherited]
+
+
+

Maximum allowed X dimension for a block.

+
+
+
+ uint32_t CUpti_ActivityDevice2::maxBlockDimY [inherited]
+
+
+

Maximum allowed Y dimension for a block.

+
+
+
+ uint32_t CUpti_ActivityDevice2::maxBlockDimZ [inherited]
+
+
+

Maximum allowed Z dimension for a block.

+
+
+
+ uint32_t CUpti_ActivityDevice2::maxBlocksPerMultiprocessor [inherited]
+
+
+

Maximum number of blocks that can be present on a multiprocessor at any given time.

+
+
+
+ uint32_t CUpti_ActivityDevice2::maxGridDimX [inherited]
+
+
+

Maximum allowed X dimension for a grid.

+
+
+
+ uint32_t CUpti_ActivityDevice2::maxGridDimY [inherited]
+
+
+

Maximum allowed Y dimension for a grid.

+
+
+
+ uint32_t CUpti_ActivityDevice2::maxGridDimZ [inherited]
+
+
+

Maximum allowed Z dimension for a grid.

+
+
+
+ uint32_t CUpti_ActivityDevice2::maxIPC [inherited]
+
+
+

The maximum "instructions per cycle" possible on each device multiprocessor.

+
+
+
+ uint32_t CUpti_ActivityDevice2::maxRegistersPerBlock [inherited]
+
+
+

Maximum number of registers that can be allocated to a block.

+
+
+
+ uint32_t CUpti_ActivityDevice2::maxRegistersPerMultiprocessor [inherited]
+
+
+

Maximum number of 32-bit registers available per multiprocessor.

+
+
+
+ uint32_t CUpti_ActivityDevice2::maxSharedMemoryPerBlock [inherited]
+
+
+

Maximum amount of shared memory that can be assigned to a block, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice2::maxSharedMemoryPerMultiprocessor [inherited]
+
+
+

Maximum amount of shared memory available per multiprocessor, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice2::maxThreadsPerBlock [inherited]
+
+
+

Maximum number of threads allowed in a block.

+
+
+
+ uint32_t CUpti_ActivityDevice2::maxWarpsPerMultiprocessor [inherited]
+
+
+

Maximum number of warps that can be present on a multiprocessor at any given time.

+
+
+
+ const + + char + * CUpti_ActivityDevice2::name [inherited]
+
+
+

The device name. This name is shared across all activity records representing instances of the device, and so should not + be modified. +

+
+
+
+ uint32_t CUpti_ActivityDevice2::numMemcpyEngines [inherited]
+
+
+

Number of memory copy engines on the device.

+
+
+
+ uint32_t CUpti_ActivityDevice2::numMultiprocessors [inherited]
+
+
+

Number of multiprocessors on the device.

+
+
+
+ uint32_t CUpti_ActivityDevice2::numThreadsPerWarp [inherited]
+
+
+

The number of threads per warp on the device.

+
+
+
+ uint32_t CUpti_ActivityDevice2::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ CUuuid CUpti_ActivityDevice2::uuid [inherited]
+
+
+

The device UUID. This value is the globally unique immutable alphanumeric identifier of the device.

+
+
+
+
+
+
+

6.17. CUpti_ActivityDevice3 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents information about a GPU device (CUPTI_ACTIVITY_KIND_DEVICE). Device activity is now reported + using the CUpti_ActivityDevice4 activity record. +

+
+

Public Variables

+
+
uint32_t  computeCapabilityMajor
+
+
uint32_t  computeCapabilityMinor
+
+
uint32_t  constantMemorySize
+
+
uint32_t  coreClockRate
+
+
uint32_t  eccEnabled
+
+
CUpti_ActivityFlag flags
+
+
uint64_t  globalMemoryBandwidth
+
+
uint64_t  globalMemorySize
+
+
uint32_t  id
+
+
uint8_t  isCudaVisible
+
+
CUpti_ActivityKind kind
+
+
uint32_t  l2CacheSize
+
+
uint32_t  maxBlockDimX
+
+
uint32_t  maxBlockDimY
+
+
uint32_t  maxBlockDimZ
+
+
uint32_t  maxBlocksPerMultiprocessor
+
+
uint32_t  maxGridDimX
+
+
uint32_t  maxGridDimY
+
+
uint32_t  maxGridDimZ
+
+
uint32_t  maxIPC
+
+
uint32_t  maxRegistersPerBlock
+
+
uint32_t  maxRegistersPerMultiprocessor
+
+
uint32_t  maxSharedMemoryPerBlock
+
+
uint32_t  maxSharedMemoryPerMultiprocessor
+
+
uint32_t  maxThreadsPerBlock
+
+
uint32_t  maxWarpsPerMultiprocessor
+
+
const + char + * name
+
+
uint32_t  numMemcpyEngines
+
+
uint32_t  numMultiprocessors
+
+
uint32_t  numThreadsPerWarp
+
+
uint32_t  pad
+
+
CUuuid  uuid
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityDevice3::computeCapabilityMajor [inherited]
+
+
+

Compute capability for the device, major number.

+
+
+
+ uint32_t CUpti_ActivityDevice3::computeCapabilityMinor [inherited]
+
+
+

Compute capability for the device, minor number.

+
+
+
+ uint32_t CUpti_ActivityDevice3::constantMemorySize [inherited]
+
+
+

The amount of constant memory on the device, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice3::coreClockRate [inherited]
+
+
+

The core clock rate of the device, in kHz.

+
+
+
+ uint32_t CUpti_ActivityDevice3::eccEnabled [inherited]
+
+
+

ECC enabled flag for device

+
+
+
+ CUpti_ActivityFlagCUpti_ActivityDevice3::flags [inherited]
+
+
+

The flags associated with the device.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint64_t CUpti_ActivityDevice3::globalMemoryBandwidth [inherited]
+
+
+

The global memory bandwidth available on the device, in kBytes/sec.

+
+
+
+ uint64_t CUpti_ActivityDevice3::globalMemorySize [inherited]
+
+
+

The amount of global memory on the device, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice3::id [inherited]
+
+
+

The device ID.

+
+
+
+ uint8_t CUpti_ActivityDevice3::isCudaVisible [inherited]
+
+
+

Flag to indicate whether the device is visible to CUDA. Users can set the device visibility using CUDA_VISIBLE_DEVICES environment + +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityDevice3::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_DEVICE.

+
+
+
+ uint32_t CUpti_ActivityDevice3::l2CacheSize [inherited]
+
+
+

The size of the L2 cache on the device, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice3::maxBlockDimX [inherited]
+
+
+

Maximum allowed X dimension for a block.

+
+
+
+ uint32_t CUpti_ActivityDevice3::maxBlockDimY [inherited]
+
+
+

Maximum allowed Y dimension for a block.

+
+
+
+ uint32_t CUpti_ActivityDevice3::maxBlockDimZ [inherited]
+
+
+

Maximum allowed Z dimension for a block.

+
+
+
+ uint32_t CUpti_ActivityDevice3::maxBlocksPerMultiprocessor [inherited]
+
+
+

Maximum number of blocks that can be present on a multiprocessor at any given time.

+
+
+
+ uint32_t CUpti_ActivityDevice3::maxGridDimX [inherited]
+
+
+

Maximum allowed X dimension for a grid.

+
+
+
+ uint32_t CUpti_ActivityDevice3::maxGridDimY [inherited]
+
+
+

Maximum allowed Y dimension for a grid.

+
+
+
+ uint32_t CUpti_ActivityDevice3::maxGridDimZ [inherited]
+
+
+

Maximum allowed Z dimension for a grid.

+
+
+
+ uint32_t CUpti_ActivityDevice3::maxIPC [inherited]
+
+
+

The maximum "instructions per cycle" possible on each device multiprocessor.

+
+
+
+ uint32_t CUpti_ActivityDevice3::maxRegistersPerBlock [inherited]
+
+
+

Maximum number of registers that can be allocated to a block.

+
+
+
+ uint32_t CUpti_ActivityDevice3::maxRegistersPerMultiprocessor [inherited]
+
+
+

Maximum number of 32-bit registers available per multiprocessor.

+
+
+
+ uint32_t CUpti_ActivityDevice3::maxSharedMemoryPerBlock [inherited]
+
+
+

Maximum amount of shared memory that can be assigned to a block, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice3::maxSharedMemoryPerMultiprocessor [inherited]
+
+
+

Maximum amount of shared memory available per multiprocessor, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice3::maxThreadsPerBlock [inherited]
+
+
+

Maximum number of threads allowed in a block.

+
+
+
+ uint32_t CUpti_ActivityDevice3::maxWarpsPerMultiprocessor [inherited]
+
+
+

Maximum number of warps that can be present on a multiprocessor at any given time.

+
+
+
+ const + + char + * CUpti_ActivityDevice3::name [inherited]
+
+
+

The device name. This name is shared across all activity records representing instances of the device, and so should not + be modified. +

+
+
+
+ uint32_t CUpti_ActivityDevice3::numMemcpyEngines [inherited]
+
+
+

Number of memory copy engines on the device.

+
+
+
+ uint32_t CUpti_ActivityDevice3::numMultiprocessors [inherited]
+
+
+

Number of multiprocessors on the device.

+
+
+
+ uint32_t CUpti_ActivityDevice3::numThreadsPerWarp [inherited]
+
+
+

The number of threads per warp on the device.

+
+
+
+ uint32_t CUpti_ActivityDevice3::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ CUuuid CUpti_ActivityDevice3::uuid [inherited]
+
+
+

The device UUID. This value is the globally unique immutable alphanumeric identifier of the device.

+
+
+
+
+
+
+

6.18. CUpti_ActivityDevice4 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents information about a GPU device (CUPTI_ACTIVITY_KIND_DEVICE).

+
+

Public Variables

+
+
uint32_t  computeCapabilityMajor
+
+
uint32_t  computeCapabilityMinor
+
+
uint32_t  computeInstanceId
+
+
uint32_t  constantMemorySize
+
+
uint32_t  coreClockRate
+
+
uint32_t  eccEnabled
+
+
CUpti_ActivityFlag flags
+
+
uint64_t  globalMemoryBandwidth
+
+
uint64_t  globalMemorySize
+
+
uint32_t  gpuInstanceId
+
+
uint32_t  id
+
+
uint8_t  isCudaVisible
+
+
uint8_t  isMigEnabled
+
+
CUpti_ActivityKind kind
+
+
uint32_t  l2CacheSize
+
+
uint32_t  maxBlockDimX
+
+
uint32_t  maxBlockDimY
+
+
uint32_t  maxBlockDimZ
+
+
uint32_t  maxBlocksPerMultiprocessor
+
+
uint32_t  maxGridDimX
+
+
uint32_t  maxGridDimY
+
+
uint32_t  maxGridDimZ
+
+
uint32_t  maxIPC
+
+
uint32_t  maxRegistersPerBlock
+
+
uint32_t  maxRegistersPerMultiprocessor
+
+
uint32_t  maxSharedMemoryPerBlock
+
+
uint32_t  maxSharedMemoryPerMultiprocessor
+
+
uint32_t  maxThreadsPerBlock
+
+
uint32_t  maxWarpsPerMultiprocessor
+
+
CUuuid  migUuid
+
+
const + char + * name
+
+
uint32_t  numMemcpyEngines
+
+
uint32_t  numMultiprocessors
+
+
uint32_t  numThreadsPerWarp
+
+
uint32_t  pad
+
+
CUuuid  uuid
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityDevice4::computeCapabilityMajor [inherited]
+
+
+

Compute capability for the device, major number.

+
+
+
+ uint32_t CUpti_ActivityDevice4::computeCapabilityMinor [inherited]
+
+
+

Compute capability for the device, minor number.

+
+
+
+ uint32_t CUpti_ActivityDevice4::computeInstanceId [inherited]
+
+
+

Compute Instance id for MIG enabled devices. If mig mode is disabled value is set to UINT32_MAX

+
+
+
+ uint32_t CUpti_ActivityDevice4::constantMemorySize [inherited]
+
+
+

The amount of constant memory on the device, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice4::coreClockRate [inherited]
+
+
+

The core clock rate of the device, in kHz.

+
+
+
+ uint32_t CUpti_ActivityDevice4::eccEnabled [inherited]
+
+
+

ECC enabled flag for device

+
+
+
+ CUpti_ActivityFlagCUpti_ActivityDevice4::flags [inherited]
+
+
+

The flags associated with the device.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint64_t CUpti_ActivityDevice4::globalMemoryBandwidth [inherited]
+
+
+

The global memory bandwidth available on the device, in kBytes/sec.

+
+
+
+ uint64_t CUpti_ActivityDevice4::globalMemorySize [inherited]
+
+
+

The amount of global memory on the device, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice4::gpuInstanceId [inherited]
+
+
+

GPU Instance id for MIG enabled devices. If mig mode is disabled value is set to UINT32_MAX

+
+
+
+ uint32_t CUpti_ActivityDevice4::id [inherited]
+
+
+

The device ID.

+
+
+
+ uint8_t CUpti_ActivityDevice4::isCudaVisible [inherited]
+
+
+

Flag to indicate whether the device is visible to CUDA. Users can set the device visibility using CUDA_VISIBLE_DEVICES environment + +

+
+
+
+ uint8_t CUpti_ActivityDevice4::isMigEnabled [inherited]
+
+
+

MIG enabled flag for device

+
+
+
+ CUpti_ActivityKindCUpti_ActivityDevice4::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_DEVICE.

+
+
+
+ uint32_t CUpti_ActivityDevice4::l2CacheSize [inherited]
+
+
+

The size of the L2 cache on the device, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice4::maxBlockDimX [inherited]
+
+
+

Maximum allowed X dimension for a block.

+
+
+
+ uint32_t CUpti_ActivityDevice4::maxBlockDimY [inherited]
+
+
+

Maximum allowed Y dimension for a block.

+
+
+
+ uint32_t CUpti_ActivityDevice4::maxBlockDimZ [inherited]
+
+
+

Maximum allowed Z dimension for a block.

+
+
+
+ uint32_t CUpti_ActivityDevice4::maxBlocksPerMultiprocessor [inherited]
+
+
+

Maximum number of blocks that can be present on a multiprocessor at any given time.

+
+
+
+ uint32_t CUpti_ActivityDevice4::maxGridDimX [inherited]
+
+
+

Maximum allowed X dimension for a grid.

+
+
+
+ uint32_t CUpti_ActivityDevice4::maxGridDimY [inherited]
+
+
+

Maximum allowed Y dimension for a grid.

+
+
+
+ uint32_t CUpti_ActivityDevice4::maxGridDimZ [inherited]
+
+
+

Maximum allowed Z dimension for a grid.

+
+
+
+ uint32_t CUpti_ActivityDevice4::maxIPC [inherited]
+
+
+

The maximum "instructions per cycle" possible on each device multiprocessor.

+
+
+
+ uint32_t CUpti_ActivityDevice4::maxRegistersPerBlock [inherited]
+
+
+

Maximum number of registers that can be allocated to a block.

+
+
+
+ uint32_t CUpti_ActivityDevice4::maxRegistersPerMultiprocessor [inherited]
+
+
+

Maximum number of 32-bit registers available per multiprocessor.

+
+
+
+ uint32_t CUpti_ActivityDevice4::maxSharedMemoryPerBlock [inherited]
+
+
+

Maximum amount of shared memory that can be assigned to a block, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice4::maxSharedMemoryPerMultiprocessor [inherited]
+
+
+

Maximum amount of shared memory available per multiprocessor, in bytes.

+
+
+
+ uint32_t CUpti_ActivityDevice4::maxThreadsPerBlock [inherited]
+
+
+

Maximum number of threads allowed in a block.

+
+
+
+ uint32_t CUpti_ActivityDevice4::maxWarpsPerMultiprocessor [inherited]
+
+
+

Maximum number of warps that can be present on a multiprocessor at any given time.

+
+
+
+ CUuuid CUpti_ActivityDevice4::migUuid [inherited]
+
+
+

The MIG UUID. This value is the globally unique immutable alphanumeric identifier of the device.

+
+
+
+ const + + char + * CUpti_ActivityDevice4::name [inherited]
+
+
+

The device name. This name is shared across all activity records representing instances of the device, and so should not + be modified. +

+
+
+
+ uint32_t CUpti_ActivityDevice4::numMemcpyEngines [inherited]
+
+
+

Number of memory copy engines on the device.

+
+
+
+ uint32_t CUpti_ActivityDevice4::numMultiprocessors [inherited]
+
+
+

Number of multiprocessors on the device.

+
+
+
+ uint32_t CUpti_ActivityDevice4::numThreadsPerWarp [inherited]
+
+
+

The number of threads per warp on the device.

+
+
+
+ uint32_t CUpti_ActivityDevice4::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ CUuuid CUpti_ActivityDevice4::uuid [inherited]
+
+
+

The device UUID. This value is the globally unique immutable alphanumeric identifier of the device.

+
+
+
+
+
+
+

6.19. CUpti_ActivityDeviceAttribute Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents information about a GPU device: either a CUpti_DeviceAttribute or CUdevice_attribute value + (CUPTI_ACTIVITY_KIND_DEVICE_ATTRIBUTE). +

+
+

Public Variables

+
+
CUpti_ActivityDeviceAttribute::@21  attribute
+
+
uint32_t  deviceId
+
+
CUpti_ActivityFlag flags
+
+
CUpti_ActivityKind kind
+
+
CUpti_ActivityDeviceAttribute::@22  value
+
+
+
+

Variables

+
+
+ CUpti_ActivityDeviceAttribute::@21 CUpti_ActivityDeviceAttribute::attribute [inherited]
+
+
+

The attribute, either a CUpti_DeviceAttribute or CUdevice_attribute. Flag CUPTI_ACTIVITY_FLAG_DEVICE_ATTRIBUTE_CUDEVICE is + used to indicate what kind of attribute this is. If CUPTI_ACTIVITY_FLAG_DEVICE_ATTRIBUTE_CUDEVICE is 1 then CUdevice_attribute + field is value, otherwise CUpti_DeviceAttribute field is valid. +

+
+
+
+ uint32_t CUpti_ActivityDeviceAttribute::deviceId [inherited]
+
+
+

The ID of the device that this attribute applies to.

+
+
+
+ CUpti_ActivityFlagCUpti_ActivityDeviceAttribute::flags [inherited]
+
+
+

The flags associated with the device.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ CUpti_ActivityKindCUpti_ActivityDeviceAttribute::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_DEVICE_ATTRIBUTE.

+
+
+
+ CUpti_ActivityDeviceAttribute::@22 CUpti_ActivityDeviceAttribute::value [inherited]
+
+
+

The value for the attribute. See CUpti_DeviceAttribute and CUdevice_attribute for the type of the value for a given attribute. + +

+
+
+
+
+
+
+

6.20. CUpti_ActivityEnvironment Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record provides CUPTI environmental data, include power, clocks, and thermals. This information is sampled at + various rates and returned in this activity record. The consumer of the record needs to check the environmentKind field to + figure out what kind of environmental record this is. +

+
+

Public Variables

+
+
CUpti_EnvironmentClocksThrottleReason clocksThrottleReasons
+
+
CUpti_ActivityEnvironment::@23::@27  cooling
+
+
uint32_t  deviceId
+
+
CUpti_ActivityEnvironmentKind environmentKind
+
+
uint32_t  fanSpeed
+
+
uint32_t  gpuTemperature
+
+
CUpti_ActivityKind kind
+
+
uint32_t  memoryClock
+
+
uint32_t  pcieLinkGen
+
+
uint32_t  pcieLinkWidth
+
+
CUpti_ActivityEnvironment::@23::@26  power
+
+
uint32_t  power
+
+
uint32_t  powerLimit
+
+
uint32_t  smClock
+
+
CUpti_ActivityEnvironment::@23::@24  speed
+
+
CUpti_ActivityEnvironment::@23::@25  temperature
+
+
uint64_t  timestamp
+
+
+
+

Variables

+
+
+ CUpti_EnvironmentClocksThrottleReasonCUpti_ActivityEnvironment::clocksThrottleReasons [inherited]
+
+
+

The clocks throttle reasons.

+
+
+
+ CUpti_ActivityEnvironment::@23::@27 CUpti_ActivityEnvironment::cooling [inherited]
+
+
+

Data returned for CUPTI_ACTIVITY_ENVIRONMENT_COOLING environment kind.

+
+
+
+ uint32_t CUpti_ActivityEnvironment::deviceId [inherited]
+
+
+

The ID of the device

+
+
+
+ CUpti_ActivityEnvironmentKindCUpti_ActivityEnvironment::environmentKind [inherited]
+
+
+

The kind of data reported in this record.

+
+
+
+ uint32_t CUpti_ActivityEnvironment::fanSpeed [inherited]
+
+
+

The fan speed as percentage of maximum.

+
+
+
+ uint32_t CUpti_ActivityEnvironment::gpuTemperature [inherited]
+
+
+

The GPU temperature in degrees C.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityEnvironment::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_ENVIRONMENT.

+
+
+
+ uint32_t CUpti_ActivityEnvironment::memoryClock [inherited]
+
+
+

The memory frequency in MHz

+
+
+
+ uint32_t CUpti_ActivityEnvironment::pcieLinkGen [inherited]
+
+
+

The PCIe link generation.

+
+
+
+ uint32_t CUpti_ActivityEnvironment::pcieLinkWidth [inherited]
+
+
+

The PCIe link width.

+
+
+
+ CUpti_ActivityEnvironment::@23::@26 CUpti_ActivityEnvironment::power [inherited]
+
+
+

Data returned for CUPTI_ACTIVITY_ENVIRONMENT_POWER environment kind.

+
+
+
+ uint32_t CUpti_ActivityEnvironment::power [inherited]
+
+
+

The power in milliwatts consumed by GPU and associated circuitry.

+
+
+
+ uint32_t CUpti_ActivityEnvironment::powerLimit [inherited]
+
+
+

The power in milliwatts that will trigger power management algorithm.

+
+
+
+ uint32_t CUpti_ActivityEnvironment::smClock [inherited]
+
+
+

The SM frequency in MHz

+
+
+
+ CUpti_ActivityEnvironment::@23::@24 CUpti_ActivityEnvironment::speed [inherited]
+
+
+

Data returned for CUPTI_ACTIVITY_ENVIRONMENT_SPEED environment kind.

+
+
+
+ CUpti_ActivityEnvironment::@23::@25 CUpti_ActivityEnvironment::temperature [inherited]
+
+
+

Data returned for CUPTI_ACTIVITY_ENVIRONMENT_TEMPERATURE environment kind.

+
+
+
+ uint64_t CUpti_ActivityEnvironment::timestamp [inherited]
+
+
+

The timestamp when this sample was retrieved, in ns. A value of 0 indicates that timestamp information could not be collected + for the marker. +

+
+
+
+
+
+
+

6.21. CUpti_ActivityEvent Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a CUPTI event value (CUPTI_ACTIVITY_KIND_EVENT). This activity record kind is not produced + by the activity API but is included for completeness and ease-of-use. Profile frameworks built on top of CUPTI that collect + event data may choose to use this type to store the collected event data. +

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
CUpti_EventDomainID domain
+
+
CUpti_EventID id
+
+
CUpti_ActivityKind kind
+
+
uint64_t  value
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityEvent::correlationId [inherited]
+
+
+

The correlation ID of the event. Use of this ID is user-defined, but typically this ID value will equal the correlation ID + of the kernel for which the event was gathered. +

+
+
+
+ CUpti_EventDomainIDCUpti_ActivityEvent::domain [inherited]
+
+
+

The event domain ID.

+
+
+
+ CUpti_EventIDCUpti_ActivityEvent::id [inherited]
+
+
+

The event ID.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityEvent::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_EVENT.

+
+
+
+ uint64_t CUpti_ActivityEvent::value [inherited]
+
+
+

The event value.

+
+
+
+
+
+
+

6.22. CUpti_ActivityEventInstance Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents the a CUPTI event value for a specific event domain instance (CUPTI_ACTIVITY_KIND_EVENT_INSTANCE). + This activity record kind is not produced by the activity API but is included for completeness and ease-of-use. Profile frameworks + built on top of CUPTI that collect event data may choose to use this type to store the collected event data. This activity + record should be used when event domain instance information needs to be associated with the event. +

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
CUpti_EventDomainID domain
+
+
CUpti_EventID id
+
+
uint32_t  instance
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pad
+
+
uint64_t  value
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityEventInstance::correlationId [inherited]
+
+
+

The correlation ID of the event. Use of this ID is user-defined, but typically this ID value will equal the correlation ID + of the kernel for which the event was gathered. +

+
+
+
+ CUpti_EventDomainIDCUpti_ActivityEventInstance::domain [inherited]
+
+
+

The event domain ID.

+
+
+
+ CUpti_EventIDCUpti_ActivityEventInstance::id [inherited]
+
+
+

The event ID.

+
+
+
+ uint32_t CUpti_ActivityEventInstance::instance [inherited]
+
+
+

The event domain instance.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityEventInstance::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_EVENT_INSTANCE.

+
+
+
+ uint32_t CUpti_ActivityEventInstance::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint64_t CUpti_ActivityEventInstance::value [inherited]
+
+
+

The event value.

+
+
+
+
+
+
+

6.23. CUpti_ActivityExternalCorrelation Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record correlates native CUDA records (e.g. CUDA Driver API, kernels, memcpys, ...) with records from external + APIs such as OpenACC. (CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION). +

+

See also:

+

CUpti_ActivityKind

+

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
uint64_t  externalId
+
+
CUpti_ExternalCorrelationKind externalKind
+
+
CUpti_ActivityKind kind
+
+
uint32_t  reserved
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityExternalCorrelation::correlationId [inherited]
+
+
+

The correlation ID of the associated CUDA driver or runtime API record.

+
+
+
+ uint64_t CUpti_ActivityExternalCorrelation::externalId [inherited]
+
+
+

The correlation ID of the associated non-CUDA API record. The exact field in the associated external record depends on that + record's activity kind ( +

+

See also:

+

externalKind). +

+

+
+
+
+ CUpti_ExternalCorrelationKindCUpti_ActivityExternalCorrelation::externalKind [inherited]
+
+
+

The kind of external API this record correlated to.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityExternalCorrelation::kind [inherited]
+
+
+

The kind of this activity.

+
+
+
+ uint32_t CUpti_ActivityExternalCorrelation::reserved [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+
+
+
+

6.24. CUpti_ActivityFunction Struct Reference

+

[CUPTI Activity API] +

+
+

This activity records function name and corresponding module information. (CUPTI_ACTIVITY_KIND_FUNCTION).

+
+

Public Variables

+
+
uint32_t  contextId
+
+
uint32_t  functionIndex
+
+
uint32_t  id
+
+
CUpti_ActivityKind kind
+
+
uint32_t  moduleId
+
+
const + char + * name
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityFunction::contextId [inherited]
+
+
+

The ID of the context where the function is launched.

+
+
+
+ uint32_t CUpti_ActivityFunction::functionIndex [inherited]
+
+
+

The function's unique symbol index in the module.

+
+
+
+ uint32_t CUpti_ActivityFunction::id [inherited]
+
+
+

ID to uniquely identify the record

+
+
+
+ CUpti_ActivityKindCUpti_ActivityFunction::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_FUNCTION.

+
+
+
+ uint32_t CUpti_ActivityFunction::moduleId [inherited]
+
+
+

The module ID in which this global/device function is present.

+
+
+
+ const + + char + * CUpti_ActivityFunction::name [inherited]
+
+
+

The name of the function. This name is shared across all activity records representing the same kernel, and so should not + be modified. +

+
+
+
+
+
+
+

6.25. CUpti_ActivityGlobalAccess Struct Reference

+

[CUPTI Activity API] +

+
+

This activity records the locations of the global accesses in the source (CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS). Global access + activities are now reported using the CUpti_ActivityGlobalAccess3 activity record. +

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
uint32_t  executed
+
+
CUpti_ActivityFlag flags
+
+
CUpti_ActivityKind kind
+
+
uint64_t  l2_transactions
+
+
uint32_t  pcOffset
+
+
uint32_t  sourceLocatorId
+
+
uint64_t  threadsExecuted
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityGlobalAccess::correlationId [inherited]
+
+
+

The correlation ID of the kernel to which this result is associated.

+
+
+
+ uint32_t CUpti_ActivityGlobalAccess::executed [inherited]
+
+
+

The number of times this instruction was executed per warp. It will be incremented when at least one of thread among warp + is active with predicate and condition code evaluating to true. +

+
+
+
+ CUpti_ActivityFlagCUpti_ActivityGlobalAccess::flags [inherited]
+
+
+

The properties of this global access.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityGlobalAccess::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS.

+
+
+
+ uint64_t CUpti_ActivityGlobalAccess::l2_transactions [inherited]
+
+
+

The total number of 32 bytes transactions to L2 cache generated by this access

+
+
+
+ uint32_t CUpti_ActivityGlobalAccess::pcOffset [inherited]
+
+
+

The pc offset for the access.

+
+
+
+ uint32_t CUpti_ActivityGlobalAccess::sourceLocatorId [inherited]
+
+
+

The ID for source locator.

+
+
+
+ uint64_t CUpti_ActivityGlobalAccess::threadsExecuted [inherited]
+
+
+

This increments each time when this instruction is executed by number of threads that executed this instruction with predicate + and condition code evaluating to true. +

+
+
+
+
+
+
+

6.26. CUpti_ActivityGlobalAccess2 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity records the locations of the global accesses in the source (CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS). Global access + activities are now reported using the CUpti_ActivityGlobalAccess3 activity record. +

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
uint32_t  executed
+
+
CUpti_ActivityFlag flags
+
+
uint32_t  functionId
+
+
CUpti_ActivityKind kind
+
+
uint64_t  l2_transactions
+
+
uint32_t  pad
+
+
uint32_t  pcOffset
+
+
uint32_t  sourceLocatorId
+
+
uint64_t  theoreticalL2Transactions
+
+
uint64_t  threadsExecuted
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityGlobalAccess2::correlationId [inherited]
+
+
+

The correlation ID of the kernel to which this result is associated.

+
+
+
+ uint32_t CUpti_ActivityGlobalAccess2::executed [inherited]
+
+
+

The number of times this instruction was executed per warp. It will be incremented when at least one of thread among warp + is active with predicate and condition code evaluating to true. +

+
+
+
+ CUpti_ActivityFlagCUpti_ActivityGlobalAccess2::flags [inherited]
+
+
+

The properties of this global access.

+
+
+
+ uint32_t CUpti_ActivityGlobalAccess2::functionId [inherited]
+
+
+

Correlation ID with global/device function name

+
+
+
+ CUpti_ActivityKindCUpti_ActivityGlobalAccess2::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS.

+
+
+
+ uint64_t CUpti_ActivityGlobalAccess2::l2_transactions [inherited]
+
+
+

The total number of 32 bytes transactions to L2 cache generated by this access

+
+
+
+ uint32_t CUpti_ActivityGlobalAccess2::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityGlobalAccess2::pcOffset [inherited]
+
+
+

The pc offset for the access.

+
+
+
+ uint32_t CUpti_ActivityGlobalAccess2::sourceLocatorId [inherited]
+
+
+

The ID for source locator.

+
+
+
+ uint64_t CUpti_ActivityGlobalAccess2::theoreticalL2Transactions [inherited]
+
+
+

The minimum number of L2 transactions possible based on the access pattern.

+
+
+
+ uint64_t CUpti_ActivityGlobalAccess2::threadsExecuted [inherited]
+
+
+

This increments each time when this instruction is executed by number of threads that executed this instruction with predicate + and condition code evaluating to true. +

+
+
+
+
+
+
+

6.27. CUpti_ActivityGlobalAccess3 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity records the locations of the global accesses in the source (CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS).

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
uint32_t  executed
+
+
CUpti_ActivityFlag flags
+
+
uint32_t  functionId
+
+
CUpti_ActivityKind kind
+
+
uint64_t  l2_transactions
+
+
uint64_t  pcOffset
+
+
uint32_t  sourceLocatorId
+
+
uint64_t  theoreticalL2Transactions
+
+
uint64_t  threadsExecuted
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityGlobalAccess3::correlationId [inherited]
+
+
+

The correlation ID of the kernel to which this result is associated.

+
+
+
+ uint32_t CUpti_ActivityGlobalAccess3::executed [inherited]
+
+
+

The number of times this instruction was executed per warp. It will be incremented when at least one of thread among warp + is active with predicate and condition code evaluating to true. +

+
+
+
+ CUpti_ActivityFlagCUpti_ActivityGlobalAccess3::flags [inherited]
+
+
+

The properties of this global access.

+
+
+
+ uint32_t CUpti_ActivityGlobalAccess3::functionId [inherited]
+
+
+

Correlation ID with global/device function name

+
+
+
+ CUpti_ActivityKindCUpti_ActivityGlobalAccess3::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS.

+
+
+
+ uint64_t CUpti_ActivityGlobalAccess3::l2_transactions [inherited]
+
+
+

The total number of 32 bytes transactions to L2 cache generated by this access

+
+
+
+ uint64_t CUpti_ActivityGlobalAccess3::pcOffset [inherited]
+
+
+

The pc offset for the access.

+
+
+
+ uint32_t CUpti_ActivityGlobalAccess3::sourceLocatorId [inherited]
+
+
+

The ID for source locator.

+
+
+
+ uint64_t CUpti_ActivityGlobalAccess3::theoreticalL2Transactions [inherited]
+
+
+

The minimum number of L2 transactions possible based on the access pattern.

+
+
+
+ uint64_t CUpti_ActivityGlobalAccess3::threadsExecuted [inherited]
+
+
+

This increments each time when this instruction is executed by number of threads that executed this instruction with predicate + and condition code evaluating to true. +

+
+
+
+
+
+
+

6.28. CUpti_ActivityGraphTrace Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents execution for a graph without giving visibility about the execution of its nodes. This is + intended to reduce overheads in tracing each node. The activity kind is CUPTI_ACTIVITY_KIND_GRAPH_TRACE +

+
+

Public Variables

+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint64_t  end
+
+
uint32_t  graphId
+
+
CUpti_ActivityKind kind
+
+
+ void + * reserved
+
+
uint64_t  start
+
+
uint32_t  streamId
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityGraphTrace::contextId [inherited]
+
+
+

The ID of the context where the graph is being launched.

+
+
+
+ uint32_t CUpti_ActivityGraphTrace::correlationId [inherited]
+
+
+

The correlation ID of the graph launch. Each graph launch is assigned a unique correlation ID that is identical to the correlation + ID in the driver API activity record that launched the graph. +

+
+
+
+ uint32_t CUpti_ActivityGraphTrace::deviceId [inherited]
+
+
+

The ID of the device where the graph execution is occurring.

+
+
+
+ uint64_t CUpti_ActivityGraphTrace::end [inherited]
+
+
+

The end timestamp for the graph execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the graph. +

+
+
+
+ uint32_t CUpti_ActivityGraphTrace::graphId [inherited]
+
+
+

The unique ID of the graph that is launched.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityGraphTrace::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_GRAPH_TRACE

+
+
+
+ + + void + * CUpti_ActivityGraphTrace::reserved [inherited]
+
+
+

This field is reserved for internal use

+
+
+
+ uint64_t CUpti_ActivityGraphTrace::start [inherited]
+
+
+

The start timestamp for the graph execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the graph. +

+
+
+
+ uint32_t CUpti_ActivityGraphTrace::streamId [inherited]
+
+
+

The ID of the stream where the graph is being launched.

+
+
+
+
+
+
+

6.29. CUpti_ActivityInstantaneousEvent Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a CUPTI event value (CUPTI_ACTIVITY_KIND_EVENT) sampled at a particular instant. This activity + record kind is not produced by the activity API but is included for completeness and ease-of-use. Profiler frameworks built + on top of CUPTI that collect event data at a particular time may choose to use this type to store the collected event data. + +

+
+

Public Variables

+
+
uint32_t  deviceId
+
+
CUpti_EventID id
+
+
CUpti_ActivityKind kind
+
+
uint32_t  reserved
+
+
uint64_t  timestamp
+
+
uint64_t  value
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityInstantaneousEvent::deviceId [inherited]
+
+
+

The device id

+
+
+
+ CUpti_EventIDCUpti_ActivityInstantaneousEvent::id [inherited]
+
+
+

The event ID.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityInstantaneousEvent::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_INSTANTANEOUS_EVENT.

+
+
+
+ uint32_t CUpti_ActivityInstantaneousEvent::reserved [inherited]
+
+
+

Undefined. reserved for internal use

+
+
+
+ uint64_t CUpti_ActivityInstantaneousEvent::timestamp [inherited]
+
+
+

The timestamp at which event is sampled

+
+
+
+ uint64_t CUpti_ActivityInstantaneousEvent::value [inherited]
+
+
+

The event value.

+
+
+
+
+
+
+

6.30. CUpti_ActivityInstantaneousEventInstance Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents the a CUPTI event value for a specific event domain instance (CUPTI_ACTIVITY_KIND_EVENT_INSTANCE) + sampled at a particular instant. This activity record kind is not produced by the activity API but is included for completeness + and ease-of-use. Profiler frameworks built on top of CUPTI that collect event data may choose to use this type to store the + collected event data. This activity record should be used when event domain instance information needs to be associated with + the event. +

+
+

Public Variables

+
+
uint32_t  deviceId
+
+
CUpti_EventID id
+
+
uint8_t  instance
+
+
CUpti_ActivityKind kind
+
+
uint8_t  pad[3]
+
+
uint64_t  timestamp
+
+
uint64_t  value
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityInstantaneousEventInstance::deviceId [inherited]
+
+
+

The device id

+
+
+
+ CUpti_EventIDCUpti_ActivityInstantaneousEventInstance::id [inherited]
+
+
+

The event ID.

+
+
+
+ uint8_t CUpti_ActivityInstantaneousEventInstance::instance [inherited]
+
+
+

The event domain instance

+
+
+
+ CUpti_ActivityKindCUpti_ActivityInstantaneousEventInstance::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_INSTANTANEOUS_EVENT_INSTANCE.

+
+
+
+ uint8_t CUpti_ActivityInstantaneousEventInstance::pad[3] [inherited]
+
+
+

Undefined. reserved for internal use

+
+
+
+ uint64_t CUpti_ActivityInstantaneousEventInstance::timestamp [inherited]
+
+
+

The timestamp at which event is sampled

+
+
+
+ uint64_t CUpti_ActivityInstantaneousEventInstance::value [inherited]
+
+
+

The event value.

+
+
+
+
+
+
+

6.31. CUpti_ActivityInstantaneousMetric Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents the collection of a CUPTI metric value (CUPTI_ACTIVITY_KIND_METRIC) at a particular instance. + This activity record kind is not produced by the activity API but is included for completeness and ease-of-use. Profiler frameworks + built on top of CUPTI that collect metric data may choose to use this type to store the collected metric data. +

+
+

Public Variables

+
+
uint32_t  deviceId
+
+
uint8_t  flags
+
+
CUpti_MetricID id
+
+
CUpti_ActivityKind kind
+
+
uint8_t  pad[3]
+
+
uint64_t  timestamp
+
+
union CUpti_MetricValue value
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityInstantaneousMetric::deviceId [inherited]
+
+
+

The device id

+
+
+
+ uint8_t CUpti_ActivityInstantaneousMetric::flags [inherited]
+
+
+

The properties of this metric.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ CUpti_MetricIDCUpti_ActivityInstantaneousMetric::id [inherited]
+
+
+

The metric ID.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityInstantaneousMetric::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_INSTANTANEOUS_METRIC.

+
+
+
+ uint8_t CUpti_ActivityInstantaneousMetric::pad[3] [inherited]
+
+
+

Undefined. reserved for internal use

+
+
+
+ uint64_t CUpti_ActivityInstantaneousMetric::timestamp [inherited]
+
+
+

The timestamp at which metric is sampled

+
+
+
+ union CUpti_MetricValueCUpti_ActivityInstantaneousMetric::value [inherited]
+
+
+

The metric value.

+
+
+
+
+
+
+

6.32. CUpti_ActivityInstantaneousMetricInstance Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a CUPTI metric value for a specific metric domain instance (CUPTI_ACTIVITY_KIND_METRIC_INSTANCE) + sampled at a particular time. This activity record kind is not produced by the activity API but is included for completeness + and ease-of-use. Profiler frameworks built on top of CUPTI that collect metric data may choose to use this type to store the + collected metric data. This activity record should be used when metric domain instance information needs to be associated + with the metric. +

+
+

Public Variables

+
+
uint32_t  deviceId
+
+
uint8_t  flags
+
+
CUpti_MetricID id
+
+
uint8_t  instance
+
+
CUpti_ActivityKind kind
+
+
uint8_t  pad[2]
+
+
uint64_t  timestamp
+
+
union CUpti_MetricValue value
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityInstantaneousMetricInstance::deviceId [inherited]
+
+
+

The device id

+
+
+
+ uint8_t CUpti_ActivityInstantaneousMetricInstance::flags [inherited]
+
+
+

The properties of this metric.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ CUpti_MetricIDCUpti_ActivityInstantaneousMetricInstance::id [inherited]
+
+
+

The metric ID.

+
+
+
+ uint8_t CUpti_ActivityInstantaneousMetricInstance::instance [inherited]
+
+
+

The metric domain instance

+
+
+
+ CUpti_ActivityKindCUpti_ActivityInstantaneousMetricInstance::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_INSTANTANEOUS_METRIC_INSTANCE.

+
+
+
+ uint8_t CUpti_ActivityInstantaneousMetricInstance::pad[2] [inherited]
+
+
+

Undefined. reserved for internal use

+
+
+
+ uint64_t CUpti_ActivityInstantaneousMetricInstance::timestamp [inherited]
+
+
+

The timestamp at which metric is sampled

+
+
+
+ union CUpti_MetricValueCUpti_ActivityInstantaneousMetricInstance::value [inherited]
+
+
+

The metric value.

+
+
+
+
+
+
+

6.33. CUpti_ActivityInstructionCorrelation Struct Reference

+

[CUPTI Activity API] +

+
+

This activity records source level sass/source correlation information. (CUPTI_ACTIVITY_KIND_INSTRUCTION_CORRELATION).

+
+

Public Variables

+
+
CUpti_ActivityFlag flags
+
+
uint32_t  functionId
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pad
+
+
uint32_t  pcOffset
+
+
uint32_t  sourceLocatorId
+
+
+
+

Variables

+
+
+ CUpti_ActivityFlagCUpti_ActivityInstructionCorrelation::flags [inherited]
+
+
+

The properties of this instruction.

+
+
+
+ uint32_t CUpti_ActivityInstructionCorrelation::functionId [inherited]
+
+
+

Correlation ID with global/device function name

+
+
+
+ CUpti_ActivityKindCUpti_ActivityInstructionCorrelation::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_INSTRUCTION_CORRELATION.

+
+
+
+ uint32_t CUpti_ActivityInstructionCorrelation::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityInstructionCorrelation::pcOffset [inherited]
+
+
+

The pc offset for the instruction.

+
+
+
+ uint32_t CUpti_ActivityInstructionCorrelation::sourceLocatorId [inherited]
+
+
+

The ID for source locator.

+
+
+
+
+
+
+

6.34. CUpti_ActivityInstructionExecution Struct Reference

+

[CUPTI Activity API] +

+
+

This activity records result for source level instruction execution. (CUPTI_ACTIVITY_KIND_INSTRUCTION_EXECUTION).

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
uint32_t  executed
+
+
CUpti_ActivityFlag flags
+
+
uint32_t  functionId
+
+
CUpti_ActivityKind kind
+
+
uint64_t  notPredOffThreadsExecuted
+
+
uint32_t  pad
+
+
uint32_t  pcOffset
+
+
uint32_t  sourceLocatorId
+
+
uint64_t  threadsExecuted
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityInstructionExecution::correlationId [inherited]
+
+
+

The correlation ID of the kernel to which this result is associated.

+
+
+
+ uint32_t CUpti_ActivityInstructionExecution::executed [inherited]
+
+
+

The number of times this instruction was executed per warp. It will be incremented regardless of predicate or condition code. + +

+
+
+
+ CUpti_ActivityFlagCUpti_ActivityInstructionExecution::flags [inherited]
+
+
+

The properties of this instruction execution.

+
+
+
+ uint32_t CUpti_ActivityInstructionExecution::functionId [inherited]
+
+
+

Correlation ID with global/device function name

+
+
+
+ CUpti_ActivityKindCUpti_ActivityInstructionExecution::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_INSTRUCTION_EXECUTION.

+
+
+
+ uint64_t CUpti_ActivityInstructionExecution::notPredOffThreadsExecuted [inherited]
+
+
+

This increments each time when this instruction is executed by number of threads that executed this instruction with predicate + and condition code evaluating to true. +

+
+
+
+ uint32_t CUpti_ActivityInstructionExecution::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityInstructionExecution::pcOffset [inherited]
+
+
+

The pc offset for the instruction.

+
+
+
+ uint32_t CUpti_ActivityInstructionExecution::sourceLocatorId [inherited]
+
+
+

The ID for source locator.

+
+
+
+ uint64_t CUpti_ActivityInstructionExecution::threadsExecuted [inherited]
+
+
+

This increments each time when this instruction is executed by number of threads that executed this instruction, regardless + of predicate or condition code. +

+
+
+
+
+
+
+

6.35. CUpti_ActivityJit Struct Reference

+

[CUPTI Activity API] +

+
+

+
+

Public Variables

+
+
const + char + * cachePath
+
+
uint64_t  cacheSize
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint64_t  end
+
+
CUpti_ActivityJitEntryType jitEntryType
+
+
uint64_t  jitOperationCorrelationId
+
+
CUpti_ActivityJitOperationType jitOperationType
+
+
CUpti_ActivityKind kind
+
+
uint32_t  padding
+
+
uint64_t  start
+
+
+
+

Variables

+
+
+ const + + char + * CUpti_ActivityJit::cachePath [inherited]
+
+
+

The path where the fat binary is cached.

+
+
+
+ uint64_t CUpti_ActivityJit::cacheSize [inherited]
+
+
+

The size of compute cache.

+
+
+
+ uint32_t CUpti_ActivityJit::correlationId [inherited]
+
+
+

The correlation ID of the JIT operation to which records belong to. Each JIT operation is assigned a unique correlation ID + that is identical to the correlation ID in the driver or runtime API activity record that launched the JIT operation. +

+
+
+
+ uint32_t CUpti_ActivityJit::deviceId [inherited]
+
+
+

The device ID.

+
+
+
+ uint64_t CUpti_ActivityJit::end [inherited]
+
+
+

The end timestamp for the JIT operation, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the JIT operation. +

+
+
+
+ CUpti_ActivityJitEntryTypeCUpti_ActivityJit::jitEntryType [inherited]
+
+
+

The JIT entry type.

+
+
+
+ uint64_t CUpti_ActivityJit::jitOperationCorrelationId [inherited]
+
+
+

The correlation ID to correlate JIT compilation, load and store operations. Each JIT compilation unit is assigned a unique + correlation ID at the time of the JIT compilation. This correlation id can be used to find the matching JIT cache load/store + records. +

+
+
+
+ CUpti_ActivityJitOperationTypeCUpti_ActivityJit::jitOperationType [inherited]
+
+
+

The JIT operation type.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityJit::kind [inherited]
+
+
+

The activity record kind must be CUPTI_ACTIVITY_KIND_JIT.

+
+
+
+ uint32_t CUpti_ActivityJit::padding [inherited]
+
+
+

Internal use.

+
+
+
+ uint64_t CUpti_ActivityJit::start [inherited]
+
+
+

The start timestamp for the JIT operation, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the JIT operation. +

+
+
+
+
+
+
+

6.36. CUpti_ActivityKernel Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL) + but is no longer generated by CUPTI. Kernel activities are now reported using the CUpti_ActivityKernel8 activity record. +

+
+

Public Variables

+
+
int32_t  blockX
+
+
int32_t  blockY
+
+
int32_t  blockZ
+
+
uint8_t  cacheConfigExecuted
+
+
uint8_t  cacheConfigRequested
+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
int32_t  dynamicSharedMemory
+
+
uint64_t  end
+
+
int32_t  gridX
+
+
int32_t  gridY
+
+
int32_t  gridZ
+
+
CUpti_ActivityKind kind
+
+
uint32_t  localMemoryPerThread
+
+
uint32_t  localMemoryTotal
+
+
const + char + * name
+
+
uint32_t  pad
+
+
uint16_t  registersPerThread
+
+
+ void + * reserved0
+
+
uint32_t  runtimeCorrelationId
+
+
uint64_t  start
+
+
int32_t  staticSharedMemory
+
+
uint32_t  streamId
+
+
+
+

Variables

+
+
+ int32_t CUpti_ActivityKernel::blockX [inherited]
+
+
+

The X-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel::blockY [inherited]
+
+
+

The Y-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel::blockZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ uint8_t CUpti_ActivityKernel::cacheConfigExecuted [inherited]
+
+
+

The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ uint8_t CUpti_ActivityKernel::cacheConfigRequested [inherited]
+
+
+

The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ uint32_t CUpti_ActivityKernel::contextId [inherited]
+
+
+

The ID of the context where the kernel is executing.

+
+
+
+ uint32_t CUpti_ActivityKernel::correlationId [inherited]
+
+
+

The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation + ID in the driver API activity record that launched the kernel. +

+
+
+
+ uint32_t CUpti_ActivityKernel::deviceId [inherited]
+
+
+

The ID of the device where the kernel is executing.

+
+
+
+ int32_t CUpti_ActivityKernel::dynamicSharedMemory [inherited]
+
+
+

The dynamic shared memory reserved for the kernel, in bytes.

+
+
+
+ uint64_t CUpti_ActivityKernel::end [inherited]
+
+
+

The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ int32_t CUpti_ActivityKernel::gridX [inherited]
+
+
+

The X-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel::gridY [inherited]
+
+
+

The Y-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel::gridZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityKernel::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL.

+
+
+
+ uint32_t CUpti_ActivityKernel::localMemoryPerThread [inherited]
+
+
+

The amount of local memory reserved for each thread, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel::localMemoryTotal [inherited]
+
+
+

The total amount of local memory reserved for the kernel, in bytes.

+
+
+
+ const + + char + * CUpti_ActivityKernel::name [inherited]
+
+
+

The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be + modified. +

+
+
+
+ uint32_t CUpti_ActivityKernel::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint16_t CUpti_ActivityKernel::registersPerThread [inherited]
+
+
+

The number of registers required for each thread executing the kernel.

+
+
+
+ + + void + * CUpti_ActivityKernel::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityKernel::runtimeCorrelationId [inherited]
+
+
+

The runtime correlation ID of the kernel. Each kernel execution is assigned a unique runtime correlation ID that is identical + to the correlation ID in the runtime API activity record that launched the kernel. +

+
+
+
+ uint64_t CUpti_ActivityKernel::start [inherited]
+
+
+

The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ int32_t CUpti_ActivityKernel::staticSharedMemory [inherited]
+
+
+

The static shared memory allocated for the kernel, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel::streamId [inherited]
+
+
+

The ID of the stream where the kernel is executing.

+
+
+
+
+
+
+

6.37. CUpti_ActivityKernel2 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL) + but is no longer generated by CUPTI. Kernel activities are now reported using the CUpti_ActivityKernel8 activity record. +

+
+

Public Variables

+
+
int32_t  blockX
+
+
int32_t  blockY
+
+
int32_t  blockZ
+
+
uint64_t  completed
+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
int32_t  dynamicSharedMemory
+
+
uint64_t  end
+
+
uint8_t  executed
+
+
int64_t  gridId
+
+
int32_t  gridX
+
+
int32_t  gridY
+
+
int32_t  gridZ
+
+
CUpti_ActivityKind kind
+
+
uint32_t  localMemoryPerThread
+
+
uint32_t  localMemoryTotal
+
+
const + char + * name
+
+
uint16_t  registersPerThread
+
+
uint8_t  requested
+
+
+ void + * reserved0
+
+
uint8_t  sharedMemoryConfig
+
+
uint64_t  start
+
+
int32_t  staticSharedMemory
+
+
uint32_t  streamId
+
+
+
+

Variables

+
+
+ int32_t CUpti_ActivityKernel2::blockX [inherited]
+
+
+

The X-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel2::blockY [inherited]
+
+
+

The Y-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel2::blockZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ uint64_t CUpti_ActivityKernel2::completed [inherited]
+
+
+

The completed timestamp for the kernel execution, in ns. It represents the completion of all it's child kernels and the kernel + itself. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion time is unknown. +

+
+
+
+ uint32_t CUpti_ActivityKernel2::contextId [inherited]
+
+
+

The ID of the context where the kernel is executing.

+
+
+
+ uint32_t CUpti_ActivityKernel2::correlationId [inherited]
+
+
+

The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation + ID in the driver or runtime API activity record that launched the kernel. +

+
+
+
+ uint32_t CUpti_ActivityKernel2::deviceId [inherited]
+
+
+

The ID of the device where the kernel is executing.

+
+
+
+ int32_t CUpti_ActivityKernel2::dynamicSharedMemory [inherited]
+
+
+

The dynamic shared memory reserved for the kernel, in bytes.

+
+
+
+ uint64_t CUpti_ActivityKernel2::end [inherited]
+
+
+

The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ uint8_t CUpti_ActivityKernel2::executed [inherited]
+
+
+

The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ int64_t CUpti_ActivityKernel2::gridId [inherited]
+
+
+

The grid ID of the kernel. Each kernel is assigned a unique grid ID at runtime.

+
+
+
+ int32_t CUpti_ActivityKernel2::gridX [inherited]
+
+
+

The X-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel2::gridY [inherited]
+
+
+

The Y-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel2::gridZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityKernel2::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL.

+
+
+
+ uint32_t CUpti_ActivityKernel2::localMemoryPerThread [inherited]
+
+
+

The amount of local memory reserved for each thread, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel2::localMemoryTotal [inherited]
+
+
+

The total amount of local memory reserved for the kernel, in bytes.

+
+
+
+ const + + char + * CUpti_ActivityKernel2::name [inherited]
+
+
+

The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be + modified. +

+
+
+
+ uint16_t CUpti_ActivityKernel2::registersPerThread [inherited]
+
+
+

The number of registers required for each thread executing the kernel.

+
+
+
+ uint8_t CUpti_ActivityKernel2::requested [inherited]
+
+
+

The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ + + void + * CUpti_ActivityKernel2::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint8_t CUpti_ActivityKernel2::sharedMemoryConfig [inherited]
+
+
+

The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. + +

+
+
+
+ uint64_t CUpti_ActivityKernel2::start [inherited]
+
+
+

The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ int32_t CUpti_ActivityKernel2::staticSharedMemory [inherited]
+
+
+

The static shared memory allocated for the kernel, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel2::streamId [inherited]
+
+
+

The ID of the stream where the kernel is executing.

+
+
+
+
+
+
+

6.38. CUpti_ActivityKernel3 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL). + Kernel activities are now reported using the CUpti_ActivityKernel8 activity record. +

+
+

Public Variables

+
+
int32_t  blockX
+
+
int32_t  blockY
+
+
int32_t  blockZ
+
+
uint64_t  completed
+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
int32_t  dynamicSharedMemory
+
+
uint64_t  end
+
+
uint8_t  executed
+
+
int64_t  gridId
+
+
int32_t  gridX
+
+
int32_t  gridY
+
+
int32_t  gridZ
+
+
CUpti_ActivityKind kind
+
+
uint32_t  localMemoryPerThread
+
+
uint32_t  localMemoryTotal
+
+
const + char + * name
+
+
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheExecuted
+
+
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheRequested
+
+
uint16_t  registersPerThread
+
+
uint8_t  requested
+
+
+ void + * reserved0
+
+
uint8_t  sharedMemoryConfig
+
+
uint64_t  start
+
+
int32_t  staticSharedMemory
+
+
uint32_t  streamId
+
+
+
+

Variables

+
+
+ int32_t CUpti_ActivityKernel3::blockX [inherited]
+
+
+

The X-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel3::blockY [inherited]
+
+
+

The Y-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel3::blockZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ uint64_t CUpti_ActivityKernel3::completed [inherited]
+
+
+

The completed timestamp for the kernel execution, in ns. It represents the completion of all it's child kernels and the kernel + itself. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion time is unknown. +

+
+
+
+ uint32_t CUpti_ActivityKernel3::contextId [inherited]
+
+
+

The ID of the context where the kernel is executing.

+
+
+
+ uint32_t CUpti_ActivityKernel3::correlationId [inherited]
+
+
+

The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation + ID in the driver or runtime API activity record that launched the kernel. +

+
+
+
+ uint32_t CUpti_ActivityKernel3::deviceId [inherited]
+
+
+

The ID of the device where the kernel is executing.

+
+
+
+ int32_t CUpti_ActivityKernel3::dynamicSharedMemory [inherited]
+
+
+

The dynamic shared memory reserved for the kernel, in bytes.

+
+
+
+ uint64_t CUpti_ActivityKernel3::end [inherited]
+
+
+

The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ uint8_t CUpti_ActivityKernel3::executed [inherited]
+
+
+

The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ int64_t CUpti_ActivityKernel3::gridId [inherited]
+
+
+

The grid ID of the kernel. Each kernel is assigned a unique grid ID at runtime.

+
+
+
+ int32_t CUpti_ActivityKernel3::gridX [inherited]
+
+
+

The X-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel3::gridY [inherited]
+
+
+

The Y-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel3::gridZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityKernel3::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL.

+
+
+
+ uint32_t CUpti_ActivityKernel3::localMemoryPerThread [inherited]
+
+
+

The amount of local memory reserved for each thread, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel3::localMemoryTotal [inherited]
+
+
+

The total amount of local memory reserved for the kernel, in bytes.

+
+
+
+ const + + char + * CUpti_ActivityKernel3::name [inherited]
+
+
+

The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be + modified. +

+
+
+
+ CUpti_ActivityPartitionedGlobalCacheConfigCUpti_ActivityKernel3::partitionedGlobalCacheExecuted [inherited]
+
+
+

The partitioned global caching executed for the kernel. Partitioned global caching is required to enable caching on certain + chips, such as devices with compute capability 5.2. Partitioned global caching can be automatically disabled if the occupancy + requirement of the launch cannot support caching. +

+
+
+
+ CUpti_ActivityPartitionedGlobalCacheConfigCUpti_ActivityKernel3::partitionedGlobalCacheRequested [inherited]
+
+
+

The partitioned global caching requested for the kernel. Partitioned global caching is required to enable caching on certain + chips, such as devices with compute capability 5.2. +

+
+
+
+ uint16_t CUpti_ActivityKernel3::registersPerThread [inherited]
+
+
+

The number of registers required for each thread executing the kernel.

+
+
+
+ uint8_t CUpti_ActivityKernel3::requested [inherited]
+
+
+

The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ + + void + * CUpti_ActivityKernel3::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint8_t CUpti_ActivityKernel3::sharedMemoryConfig [inherited]
+
+
+

The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. + +

+
+
+
+ uint64_t CUpti_ActivityKernel3::start [inherited]
+
+
+

The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ int32_t CUpti_ActivityKernel3::staticSharedMemory [inherited]
+
+
+

The static shared memory allocated for the kernel, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel3::streamId [inherited]
+
+
+

The ID of the stream where the kernel is executing.

+
+
+
+
+
+
+

6.39. CUpti_ActivityKernel4 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL). + Kernel activities are now reported using the CUpti_ActivityKernel8 activity record. +

+
+

Public Variables

+
+
int32_t  blockX
+
+
int32_t  blockY
+
+
int32_t  blockZ
+
+
CUpti_ActivityKernel4::@9  cacheConfig
+
+
uint64_t  completed
+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
int32_t  dynamicSharedMemory
+
+
uint64_t  end
+
+
uint8_t  executed
+
+
int64_t  gridId
+
+
int32_t  gridX
+
+
int32_t  gridY
+
+
int32_t  gridZ
+
+
uint8_t  isSharedMemoryCarveoutRequested
+
+
CUpti_ActivityKind kind
+
+
uint8_t  launchType
+
+
uint32_t  localMemoryPerThread
+
+
uint32_t  localMemoryTotal
+
+
const + char + * name
+
+
uint8_t  padding
+
+
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheExecuted
+
+
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheRequested
+
+
uint64_t  queued
+
+
uint16_t  registersPerThread
+
+
uint8_t  requested
+
+
+ void + * reserved0
+
+
uint8_t  sharedMemoryCarveoutRequested
+
+
uint8_t  sharedMemoryConfig
+
+
uint32_t  sharedMemoryExecuted
+
+
uint64_t  start
+
+
int32_t  staticSharedMemory
+
+
uint32_t  streamId
+
+
uint64_t  submitted
+
+
+
+

Variables

+
+
+ int32_t CUpti_ActivityKernel4::blockX [inherited]
+
+
+

The X-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel4::blockY [inherited]
+
+
+

The Y-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel4::blockZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ CUpti_ActivityKernel4::@9 CUpti_ActivityKernel4::cacheConfig [inherited]
+
+
+

For devices with compute capability 7.0+ cacheConfig values are not updated in case field isSharedMemoryCarveoutRequested + is set +

+
+
+
+ uint64_t CUpti_ActivityKernel4::completed [inherited]
+
+
+

The completed timestamp for the kernel execution, in ns. It represents the completion of all it's child kernels and the kernel + itself. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion time is unknown. +

+
+
+
+ uint32_t CUpti_ActivityKernel4::contextId [inherited]
+
+
+

The ID of the context where the kernel is executing.

+
+
+
+ uint32_t CUpti_ActivityKernel4::correlationId [inherited]
+
+
+

The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation + ID in the driver or runtime API activity record that launched the kernel. +

+
+
+
+ uint32_t CUpti_ActivityKernel4::deviceId [inherited]
+
+
+

The ID of the device where the kernel is executing.

+
+
+
+ int32_t CUpti_ActivityKernel4::dynamicSharedMemory [inherited]
+
+
+

The dynamic shared memory reserved for the kernel, in bytes.

+
+
+
+ uint64_t CUpti_ActivityKernel4::end [inherited]
+
+
+

The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ uint8_t CUpti_ActivityKernel4::executed [inherited]
+
+
+

The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ int64_t CUpti_ActivityKernel4::gridId [inherited]
+
+
+

The grid ID of the kernel. Each kernel is assigned a unique grid ID at runtime.

+
+
+
+ int32_t CUpti_ActivityKernel4::gridX [inherited]
+
+
+

The X-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel4::gridY [inherited]
+
+
+

The Y-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel4::gridZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ uint8_t CUpti_ActivityKernel4::isSharedMemoryCarveoutRequested [inherited]
+
+
+

This indicates if CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT was updated for the kernel launch

+
+
+
+ CUpti_ActivityKindCUpti_ActivityKernel4::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL.

+
+
+
+ uint8_t CUpti_ActivityKernel4::launchType [inherited]
+
+
+

The indicates if the kernel was executed via a regular launch or via a single/multi device cooperative launch.

+

See also:

+

CUpti_ActivityLaunchType

+

+
+
+
+ uint32_t CUpti_ActivityKernel4::localMemoryPerThread [inherited]
+
+
+

The amount of local memory reserved for each thread, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel4::localMemoryTotal [inherited]
+
+
+

The total amount of local memory reserved for the kernel, in bytes.

+
+
+
+ const + + char + * CUpti_ActivityKernel4::name [inherited]
+
+
+

The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be + modified. +

+
+
+
+ uint8_t CUpti_ActivityKernel4::padding [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ CUpti_ActivityPartitionedGlobalCacheConfigCUpti_ActivityKernel4::partitionedGlobalCacheExecuted [inherited]
+
+
+

The partitioned global caching executed for the kernel. Partitioned global caching is required to enable caching on certain + chips, such as devices with compute capability 5.2. Partitioned global caching can be automatically disabled if the occupancy + requirement of the launch cannot support caching. +

+
+
+
+ CUpti_ActivityPartitionedGlobalCacheConfigCUpti_ActivityKernel4::partitionedGlobalCacheRequested [inherited]
+
+
+

The partitioned global caching requested for the kernel. Partitioned global caching is required to enable caching on certain + chips, such as devices with compute capability 5.2. +

+
+
+
+ uint64_t CUpti_ActivityKernel4::queued [inherited]
+
+
+

The timestamp when the kernel is queued up in the command buffer, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that + the queued time could not be collected for the kernel. This timestamp is not collected by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection. +

+

Command buffer is a buffer written by CUDA driver to send commands like kernel launch, memory copy etc to the GPU. All launches + of CUDA kernels are asynchrnous with respect to the host, the host requests the launch by writing commands into the command + buffer, then returns without checking the GPU's progress. +

+
+
+
+ uint16_t CUpti_ActivityKernel4::registersPerThread [inherited]
+
+
+

The number of registers required for each thread executing the kernel.

+
+
+
+ uint8_t CUpti_ActivityKernel4::requested [inherited]
+
+
+

The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ + + void + * CUpti_ActivityKernel4::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint8_t CUpti_ActivityKernel4::sharedMemoryCarveoutRequested [inherited]
+
+
+

Shared memory carveout value requested for the function in percentage of the total resource. The value will be updated only + if field isSharedMemoryCarveoutRequested is set. +

+
+
+
+ uint8_t CUpti_ActivityKernel4::sharedMemoryConfig [inherited]
+
+
+

The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. + +

+
+
+
+ uint32_t CUpti_ActivityKernel4::sharedMemoryExecuted [inherited]
+
+
+

Shared memory size set by the driver.

+
+
+
+ uint64_t CUpti_ActivityKernel4::start [inherited]
+
+
+

The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ int32_t CUpti_ActivityKernel4::staticSharedMemory [inherited]
+
+
+

The static shared memory allocated for the kernel, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel4::streamId [inherited]
+
+
+

The ID of the stream where the kernel is executing.

+
+
+
+ uint64_t CUpti_ActivityKernel4::submitted [inherited]
+
+
+

The timestamp when the command buffer containing the kernel launch is submitted to the GPU, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN + indicates that the submitted time could not be collected for the kernel. This timestamp is not collected by default. Use API + cuptiActivityEnableLatencyTimestamps() to enable collection. +

+
+
+
+
+
+
+

6.40. CUpti_ActivityKernel5 Struct Reference

+

[CUPTI Activity API] +

+
+

+
+

Public Variables

+
+
int32_t  blockX
+
+
int32_t  blockY
+
+
int32_t  blockZ
+
+
CUpti_ActivityKernel5::@11  cacheConfig
+
+
uint64_t  completed
+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
int32_t  dynamicSharedMemory
+
+
uint64_t  end
+
+
uint8_t  executed
+
+
uint32_t  graphId
+
+
uint64_t  graphNodeId
+
+
int64_t  gridId
+
+
int32_t  gridX
+
+
int32_t  gridY
+
+
int32_t  gridZ
+
+
uint8_t  isSharedMemoryCarveoutRequested
+
+
CUpti_ActivityKind kind
+
+
uint8_t  launchType
+
+
uint32_t  localMemoryPerThread
+
+
uint32_t  localMemoryTotal
+
+
const + char + * name
+
+
uint8_t  padding
+
+
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheExecuted
+
+
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheRequested
+
+
uint64_t  queued
+
+
uint16_t  registersPerThread
+
+
uint8_t  requested
+
+
+ void + * reserved0
+
+
uint8_t  sharedMemoryCarveoutRequested
+
+
uint8_t  sharedMemoryConfig
+
+
uint32_t  sharedMemoryExecuted
+
+
CUpti_FuncShmemLimitConfig shmemLimitConfig
+
+
uint64_t  start
+
+
int32_t  staticSharedMemory
+
+
uint32_t  streamId
+
+
uint64_t  submitted
+
+
+
+

Variables

+
+
+ int32_t CUpti_ActivityKernel5::blockX [inherited]
+
+
+

The X-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel5::blockY [inherited]
+
+
+

The Y-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel5::blockZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ CUpti_ActivityKernel5::@11 CUpti_ActivityKernel5::cacheConfig [inherited]
+
+
+

For devices with compute capability 7.0+ cacheConfig values are not updated in case field isSharedMemoryCarveoutRequested + is set +

+
+
+
+ uint64_t CUpti_ActivityKernel5::completed [inherited]
+
+
+

The completed timestamp for the kernel execution, in ns. It represents the completion of all it's child kernels and the kernel + itself. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion time is unknown. +

+
+
+
+ uint32_t CUpti_ActivityKernel5::contextId [inherited]
+
+
+

The ID of the context where the kernel is executing.

+
+
+
+ uint32_t CUpti_ActivityKernel5::correlationId [inherited]
+
+
+

The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation + ID in the driver or runtime API activity record that launched the kernel. +

+
+
+
+ uint32_t CUpti_ActivityKernel5::deviceId [inherited]
+
+
+

The ID of the device where the kernel is executing.

+
+
+
+ int32_t CUpti_ActivityKernel5::dynamicSharedMemory [inherited]
+
+
+

The dynamic shared memory reserved for the kernel, in bytes.

+
+
+
+ uint64_t CUpti_ActivityKernel5::end [inherited]
+
+
+

The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ uint8_t CUpti_ActivityKernel5::executed [inherited]
+
+
+

The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ uint32_t CUpti_ActivityKernel5::graphId [inherited]
+
+
+

The unique ID of the graph that launched this kernel through graph launch APIs. This field will be 0 if the kernel is not + launched through graph launch APIs. +

+
+
+
+ uint64_t CUpti_ActivityKernel5::graphNodeId [inherited]
+
+
+

The unique ID of the graph node that launched this kernel through graph launch APIs. This field will be 0 if the kernel is + not launched through graph launch APIs. +

+
+
+
+ int64_t CUpti_ActivityKernel5::gridId [inherited]
+
+
+

The grid ID of the kernel. Each kernel is assigned a unique grid ID at runtime.

+
+
+
+ int32_t CUpti_ActivityKernel5::gridX [inherited]
+
+
+

The X-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel5::gridY [inherited]
+
+
+

The Y-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel5::gridZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ uint8_t CUpti_ActivityKernel5::isSharedMemoryCarveoutRequested [inherited]
+
+
+

This indicates if CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT was updated for the kernel launch

+
+
+
+ CUpti_ActivityKindCUpti_ActivityKernel5::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL.

+
+
+
+ uint8_t CUpti_ActivityKernel5::launchType [inherited]
+
+
+

The indicates if the kernel was executed via a regular launch or via a single/multi device cooperative launch.

+

See also:

+

CUpti_ActivityLaunchType

+

+
+
+
+ uint32_t CUpti_ActivityKernel5::localMemoryPerThread [inherited]
+
+
+

The amount of local memory reserved for each thread, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel5::localMemoryTotal [inherited]
+
+
+

The total amount of local memory reserved for the kernel, in bytes.

+
+
+
+ const + + char + * CUpti_ActivityKernel5::name [inherited]
+
+
+

The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be + modified. +

+
+
+
+ uint8_t CUpti_ActivityKernel5::padding [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ CUpti_ActivityPartitionedGlobalCacheConfigCUpti_ActivityKernel5::partitionedGlobalCacheExecuted [inherited]
+
+
+

The partitioned global caching executed for the kernel. Partitioned global caching is required to enable caching on certain + chips, such as devices with compute capability 5.2. Partitioned global caching can be automatically disabled if the occupancy + requirement of the launch cannot support caching. +

+
+
+
+ CUpti_ActivityPartitionedGlobalCacheConfigCUpti_ActivityKernel5::partitionedGlobalCacheRequested [inherited]
+
+
+

The partitioned global caching requested for the kernel. Partitioned global caching is required to enable caching on certain + chips, such as devices with compute capability 5.2. +

+
+
+
+ uint64_t CUpti_ActivityKernel5::queued [inherited]
+
+
+

The timestamp when the kernel is queued up in the command buffer, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that + the queued time could not be collected for the kernel. This timestamp is not collected by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection. +

+

Command buffer is a buffer written by CUDA driver to send commands like kernel launch, memory copy etc to the GPU. All launches + of CUDA kernels are asynchrnous with respect to the host, the host requests the launch by writing commands into the command + buffer, then returns without checking the GPU's progress. +

+
+
+
+ uint16_t CUpti_ActivityKernel5::registersPerThread [inherited]
+
+
+

The number of registers required for each thread executing the kernel.

+
+
+
+ uint8_t CUpti_ActivityKernel5::requested [inherited]
+
+
+

The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ + + void + * CUpti_ActivityKernel5::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint8_t CUpti_ActivityKernel5::sharedMemoryCarveoutRequested [inherited]
+
+
+

Shared memory carveout value requested for the function in percentage of the total resource. The value will be updated only + if field isSharedMemoryCarveoutRequested is set. +

+
+
+
+ uint8_t CUpti_ActivityKernel5::sharedMemoryConfig [inherited]
+
+
+

The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. + +

+
+
+
+ uint32_t CUpti_ActivityKernel5::sharedMemoryExecuted [inherited]
+
+
+

Shared memory size set by the driver.

+
+
+
+ CUpti_FuncShmemLimitConfigCUpti_ActivityKernel5::shmemLimitConfig [inherited]
+
+
+

The shared memory limit config for the kernel. This field shows whether user has opted for a higher per block limit of dynamic + shared memory. +

+
+
+
+ uint64_t CUpti_ActivityKernel5::start [inherited]
+
+
+

The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ int32_t CUpti_ActivityKernel5::staticSharedMemory [inherited]
+
+
+

The static shared memory allocated for the kernel, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel5::streamId [inherited]
+
+
+

The ID of the stream where the kernel is executing.

+
+
+
+ uint64_t CUpti_ActivityKernel5::submitted [inherited]
+
+
+

The timestamp when the command buffer containing the kernel launch is submitted to the GPU, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN + indicates that the submitted time could not be collected for the kernel. This timestamp is not collected by default. Use API + cuptiActivityEnableLatencyTimestamps() to enable collection. +

+
+
+
+
+
+
+

6.41. CUpti_ActivityKernel6 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL) + but is no longer generated by CUPTI. Kernel activities are now reported using the CUpti_ActivityKernel8 activity record. +

+
+

Public Variables

+
+
int32_t  blockX
+
+
int32_t  blockY
+
+
int32_t  blockZ
+
+
CUpti_ActivityKernel6::@13  cacheConfig
+
+
uint64_t  completed
+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
int32_t  dynamicSharedMemory
+
+
uint64_t  end
+
+
uint8_t  executed
+
+
uint32_t  graphId
+
+
uint64_t  graphNodeId
+
+
int64_t  gridId
+
+
int32_t  gridX
+
+
int32_t  gridY
+
+
int32_t  gridZ
+
+
uint8_t  isSharedMemoryCarveoutRequested
+
+
CUpti_ActivityKind kind
+
+
uint8_t  launchType
+
+
uint32_t  localMemoryPerThread
+
+
uint32_t  localMemoryTotal
+
+
const + char + * name
+
+
+ CUaccessPolicyWindow + * pAccessPolicyWindow
+
+
uint8_t  padding
+
+
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheExecuted
+
+
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheRequested
+
+
uint64_t  queued
+
+
uint16_t  registersPerThread
+
+
uint8_t  requested
+
+
+ void + * reserved0
+
+
uint8_t  sharedMemoryCarveoutRequested
+
+
uint8_t  sharedMemoryConfig
+
+
uint32_t  sharedMemoryExecuted
+
+
CUpti_FuncShmemLimitConfig shmemLimitConfig
+
+
uint64_t  start
+
+
int32_t  staticSharedMemory
+
+
uint32_t  streamId
+
+
uint64_t  submitted
+
+
+
+

Variables

+
+
+ int32_t CUpti_ActivityKernel6::blockX [inherited]
+
+
+

The X-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel6::blockY [inherited]
+
+
+

The Y-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel6::blockZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ CUpti_ActivityKernel6::@13 CUpti_ActivityKernel6::cacheConfig [inherited]
+
+
+

For devices with compute capability 7.0+ cacheConfig values are not updated in case field isSharedMemoryCarveoutRequested + is set +

+
+
+
+ uint64_t CUpti_ActivityKernel6::completed [inherited]
+
+
+

The completed timestamp for the kernel execution, in ns. It represents the completion of all it's child kernels and the kernel + itself. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion time is unknown. +

+
+
+
+ uint32_t CUpti_ActivityKernel6::contextId [inherited]
+
+
+

The ID of the context where the kernel is executing.

+
+
+
+ uint32_t CUpti_ActivityKernel6::correlationId [inherited]
+
+
+

The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation + ID in the driver or runtime API activity record that launched the kernel. +

+
+
+
+ uint32_t CUpti_ActivityKernel6::deviceId [inherited]
+
+
+

The ID of the device where the kernel is executing.

+
+
+
+ int32_t CUpti_ActivityKernel6::dynamicSharedMemory [inherited]
+
+
+

The dynamic shared memory reserved for the kernel, in bytes.

+
+
+
+ uint64_t CUpti_ActivityKernel6::end [inherited]
+
+
+

The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ uint8_t CUpti_ActivityKernel6::executed [inherited]
+
+
+

The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ uint32_t CUpti_ActivityKernel6::graphId [inherited]
+
+
+

The unique ID of the graph that launched this kernel through graph launch APIs. This field will be 0 if the kernel is not + launched through graph launch APIs. +

+
+
+
+ uint64_t CUpti_ActivityKernel6::graphNodeId [inherited]
+
+
+

The unique ID of the graph node that launched this kernel through graph launch APIs. This field will be 0 if the kernel is + not launched through graph launch APIs. +

+
+
+
+ int64_t CUpti_ActivityKernel6::gridId [inherited]
+
+
+

The grid ID of the kernel. Each kernel is assigned a unique grid ID at runtime.

+
+
+
+ int32_t CUpti_ActivityKernel6::gridX [inherited]
+
+
+

The X-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel6::gridY [inherited]
+
+
+

The Y-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel6::gridZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ uint8_t CUpti_ActivityKernel6::isSharedMemoryCarveoutRequested [inherited]
+
+
+

This indicates if CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT was updated for the kernel launch

+
+
+
+ CUpti_ActivityKindCUpti_ActivityKernel6::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL.

+
+
+
+ uint8_t CUpti_ActivityKernel6::launchType [inherited]
+
+
+

The indicates if the kernel was executed via a regular launch or via a single/multi device cooperative launch.

+

See also:

+

CUpti_ActivityLaunchType

+

+
+
+
+ uint32_t CUpti_ActivityKernel6::localMemoryPerThread [inherited]
+
+
+

The amount of local memory reserved for each thread, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel6::localMemoryTotal [inherited]
+
+
+

The total amount of local memory reserved for the kernel, in bytes.

+
+
+
+ const + + char + * CUpti_ActivityKernel6::name [inherited]
+
+
+

The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be + modified. +

+
+
+
+ + + CUaccessPolicyWindow + * CUpti_ActivityKernel6::pAccessPolicyWindow [inherited]
+
+
+

The pointer to the access policy window. The structure CUaccessPolicyWindow is defined in cuda.h.

+
+
+
+ uint8_t CUpti_ActivityKernel6::padding [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ CUpti_ActivityPartitionedGlobalCacheConfigCUpti_ActivityKernel6::partitionedGlobalCacheExecuted [inherited]
+
+
+

The partitioned global caching executed for the kernel. Partitioned global caching is required to enable caching on certain + chips, such as devices with compute capability 5.2. Partitioned global caching can be automatically disabled if the occupancy + requirement of the launch cannot support caching. +

+
+
+
+ CUpti_ActivityPartitionedGlobalCacheConfigCUpti_ActivityKernel6::partitionedGlobalCacheRequested [inherited]
+
+
+

The partitioned global caching requested for the kernel. Partitioned global caching is required to enable caching on certain + chips, such as devices with compute capability 5.2. +

+
+
+
+ uint64_t CUpti_ActivityKernel6::queued [inherited]
+
+
+

The timestamp when the kernel is queued up in the command buffer, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that + the queued time could not be collected for the kernel. This timestamp is not collected by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection. +

+

Command buffer is a buffer written by CUDA driver to send commands like kernel launch, memory copy etc to the GPU. All launches + of CUDA kernels are asynchrnous with respect to the host, the host requests the launch by writing commands into the command + buffer, then returns without checking the GPU's progress. +

+
+
+
+ uint16_t CUpti_ActivityKernel6::registersPerThread [inherited]
+
+
+

The number of registers required for each thread executing the kernel.

+
+
+
+ uint8_t CUpti_ActivityKernel6::requested [inherited]
+
+
+

The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ + + void + * CUpti_ActivityKernel6::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint8_t CUpti_ActivityKernel6::sharedMemoryCarveoutRequested [inherited]
+
+
+

Shared memory carveout value requested for the function in percentage of the total resource. The value will be updated only + if field isSharedMemoryCarveoutRequested is set. +

+
+
+
+ uint8_t CUpti_ActivityKernel6::sharedMemoryConfig [inherited]
+
+
+

The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. + +

+
+
+
+ uint32_t CUpti_ActivityKernel6::sharedMemoryExecuted [inherited]
+
+
+

Shared memory size set by the driver.

+
+
+
+ CUpti_FuncShmemLimitConfigCUpti_ActivityKernel6::shmemLimitConfig [inherited]
+
+
+

The shared memory limit config for the kernel. This field shows whether user has opted for a higher per block limit of dynamic + shared memory. +

+
+
+
+ uint64_t CUpti_ActivityKernel6::start [inherited]
+
+
+

The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ int32_t CUpti_ActivityKernel6::staticSharedMemory [inherited]
+
+
+

The static shared memory allocated for the kernel, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel6::streamId [inherited]
+
+
+

The ID of the stream where the kernel is executing.

+
+
+
+ uint64_t CUpti_ActivityKernel6::submitted [inherited]
+
+
+

The timestamp when the command buffer containing the kernel launch is submitted to the GPU, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN + indicates that the submitted time could not be collected for the kernel. This timestamp is not collected by default. Use API + cuptiActivityEnableLatencyTimestamps() to enable collection. +

+
+
+
+
+
+
+

6.42. CUpti_ActivityKernel7 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL) + but is no longer generated by CUPTI. Kernel activities are now reported using the CUpti_ActivityKernel8 activity record. +

+
+

Public Variables

+
+
int32_t  blockX
+
+
int32_t  blockY
+
+
int32_t  blockZ
+
+
CUpti_ActivityKernel7::@15  cacheConfig
+
+
uint32_t  channelID
+
+
CUpti_ChannelType  channelType
+
+
uint64_t  completed
+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
int32_t  dynamicSharedMemory
+
+
uint64_t  end
+
+
uint8_t  executed
+
+
uint32_t  graphId
+
+
uint64_t  graphNodeId
+
+
int64_t  gridId
+
+
int32_t  gridX
+
+
int32_t  gridY
+
+
int32_t  gridZ
+
+
uint8_t  isSharedMemoryCarveoutRequested
+
+
CUpti_ActivityKind kind
+
+
uint8_t  launchType
+
+
uint32_t  localMemoryPerThread
+
+
uint32_t  localMemoryTotal
+
+
const + char + * name
+
+
+ CUaccessPolicyWindow + * pAccessPolicyWindow
+
+
uint8_t  padding
+
+
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheExecuted
+
+
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheRequested
+
+
uint64_t  queued
+
+
uint16_t  registersPerThread
+
+
uint8_t  requested
+
+
+ void + * reserved0
+
+
uint8_t  sharedMemoryCarveoutRequested
+
+
uint8_t  sharedMemoryConfig
+
+
uint32_t  sharedMemoryExecuted
+
+
CUpti_FuncShmemLimitConfig shmemLimitConfig
+
+
uint64_t  start
+
+
int32_t  staticSharedMemory
+
+
uint32_t  streamId
+
+
uint64_t  submitted
+
+
+
+

Variables

+
+
+ int32_t CUpti_ActivityKernel7::blockX [inherited]
+
+
+

The X-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel7::blockY [inherited]
+
+
+

The Y-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel7::blockZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ CUpti_ActivityKernel7::@15 CUpti_ActivityKernel7::cacheConfig [inherited]
+
+
+

For devices with compute capability 7.0+ cacheConfig values are not updated in case field isSharedMemoryCarveoutRequested + is set +

+
+
+
+ uint32_t CUpti_ActivityKernel7::channelID [inherited]
+
+
+

The ID of the HW channel on which the kernel is launched.

+
+
+
+ CUpti_ChannelType CUpti_ActivityKernel7::channelType [inherited]
+
+
+

The type of the channel

+
+
+
+ uint64_t CUpti_ActivityKernel7::completed [inherited]
+
+
+

The completed timestamp for the kernel execution, in ns. It represents the completion of all it's child kernels and the kernel + itself. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion time is unknown. +

+
+
+
+ uint32_t CUpti_ActivityKernel7::contextId [inherited]
+
+
+

The ID of the context where the kernel is executing.

+
+
+
+ uint32_t CUpti_ActivityKernel7::correlationId [inherited]
+
+
+

The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation + ID in the driver or runtime API activity record that launched the kernel. +

+
+
+
+ uint32_t CUpti_ActivityKernel7::deviceId [inherited]
+
+
+

The ID of the device where the kernel is executing.

+
+
+
+ int32_t CUpti_ActivityKernel7::dynamicSharedMemory [inherited]
+
+
+

The dynamic shared memory reserved for the kernel, in bytes.

+
+
+
+ uint64_t CUpti_ActivityKernel7::end [inherited]
+
+
+

The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ uint8_t CUpti_ActivityKernel7::executed [inherited]
+
+
+

The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ uint32_t CUpti_ActivityKernel7::graphId [inherited]
+
+
+

The unique ID of the graph that launched this kernel through graph launch APIs. This field will be 0 if the kernel is not + launched through graph launch APIs. +

+
+
+
+ uint64_t CUpti_ActivityKernel7::graphNodeId [inherited]
+
+
+

The unique ID of the graph node that launched this kernel through graph launch APIs. This field will be 0 if the kernel is + not launched through graph launch APIs. +

+
+
+
+ int64_t CUpti_ActivityKernel7::gridId [inherited]
+
+
+

The grid ID of the kernel. Each kernel is assigned a unique grid ID at runtime.

+
+
+
+ int32_t CUpti_ActivityKernel7::gridX [inherited]
+
+
+

The X-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel7::gridY [inherited]
+
+
+

The Y-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel7::gridZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ uint8_t CUpti_ActivityKernel7::isSharedMemoryCarveoutRequested [inherited]
+
+
+

This indicates if CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT was updated for the kernel launch

+
+
+
+ CUpti_ActivityKindCUpti_ActivityKernel7::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL.

+
+
+
+ uint8_t CUpti_ActivityKernel7::launchType [inherited]
+
+
+

The indicates if the kernel was executed via a regular launch or via a single/multi device cooperative launch.

+

See also:

+

CUpti_ActivityLaunchType

+

+
+
+
+ uint32_t CUpti_ActivityKernel7::localMemoryPerThread [inherited]
+
+
+

The amount of local memory reserved for each thread, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel7::localMemoryTotal [inherited]
+
+
+

The total amount of local memory reserved for the kernel, in bytes.

+
+
+
+ const + + char + * CUpti_ActivityKernel7::name [inherited]
+
+
+

The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be + modified. +

+
+
+
+ + + CUaccessPolicyWindow + * CUpti_ActivityKernel7::pAccessPolicyWindow [inherited]
+
+
+

The pointer to the access policy window. The structure CUaccessPolicyWindow is defined in cuda.h.

+
+
+
+ uint8_t CUpti_ActivityKernel7::padding [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ CUpti_ActivityPartitionedGlobalCacheConfigCUpti_ActivityKernel7::partitionedGlobalCacheExecuted [inherited]
+
+
+

The partitioned global caching executed for the kernel. Partitioned global caching is required to enable caching on certain + chips, such as devices with compute capability 5.2. Partitioned global caching can be automatically disabled if the occupancy + requirement of the launch cannot support caching. +

+
+
+
+ CUpti_ActivityPartitionedGlobalCacheConfigCUpti_ActivityKernel7::partitionedGlobalCacheRequested [inherited]
+
+
+

The partitioned global caching requested for the kernel. Partitioned global caching is required to enable caching on certain + chips, such as devices with compute capability 5.2. +

+
+
+
+ uint64_t CUpti_ActivityKernel7::queued [inherited]
+
+
+

The timestamp when the kernel is queued up in the command buffer, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that + the queued time could not be collected for the kernel. This timestamp is not collected by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection. +

+

Command buffer is a buffer written by CUDA driver to send commands like kernel launch, memory copy etc to the GPU. All launches + of CUDA kernels are asynchrnous with respect to the host, the host requests the launch by writing commands into the command + buffer, then returns without checking the GPU's progress. +

+
+
+
+ uint16_t CUpti_ActivityKernel7::registersPerThread [inherited]
+
+
+

The number of registers required for each thread executing the kernel.

+
+
+
+ uint8_t CUpti_ActivityKernel7::requested [inherited]
+
+
+

The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ + + void + * CUpti_ActivityKernel7::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint8_t CUpti_ActivityKernel7::sharedMemoryCarveoutRequested [inherited]
+
+
+

Shared memory carveout value requested for the function in percentage of the total resource. The value will be updated only + if field isSharedMemoryCarveoutRequested is set. +

+
+
+
+ uint8_t CUpti_ActivityKernel7::sharedMemoryConfig [inherited]
+
+
+

The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. + +

+
+
+
+ uint32_t CUpti_ActivityKernel7::sharedMemoryExecuted [inherited]
+
+
+

Shared memory size set by the driver.

+
+
+
+ CUpti_FuncShmemLimitConfigCUpti_ActivityKernel7::shmemLimitConfig [inherited]
+
+
+

The shared memory limit config for the kernel. This field shows whether user has opted for a higher per block limit of dynamic + shared memory. +

+
+
+
+ uint64_t CUpti_ActivityKernel7::start [inherited]
+
+
+

The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ int32_t CUpti_ActivityKernel7::staticSharedMemory [inherited]
+
+
+

The static shared memory allocated for the kernel, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel7::streamId [inherited]
+
+
+

The ID of the stream where the kernel is executing.

+
+
+
+ uint64_t CUpti_ActivityKernel7::submitted [inherited]
+
+
+

The timestamp when the command buffer containing the kernel launch is submitted to the GPU, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN + indicates that the submitted time could not be collected for the kernel. This timestamp is not collected by default. Use API + cuptiActivityEnableLatencyTimestamps() to enable collection. +

+
+
+
+
+
+
+

6.43. CUpti_ActivityKernel8 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL) + +

+
+

Public Variables

+
+
int32_t  blockX
+
+
int32_t  blockY
+
+
int32_t  blockZ
+
+
CUpti_ActivityKernel8::@17  cacheConfig
+
+
uint32_t  channelID
+
+
CUpti_ChannelType  channelType
+
+
uint32_t  clusterSchedulingPolicy
+
+
uint32_t  clusterX
+
+
uint32_t  clusterY
+
+
uint32_t  clusterZ
+
+
uint64_t  completed
+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
int32_t  dynamicSharedMemory
+
+
uint64_t  end
+
+
uint8_t  executed
+
+
uint32_t  graphId
+
+
uint64_t  graphNodeId
+
+
int64_t  gridId
+
+
int32_t  gridX
+
+
int32_t  gridY
+
+
int32_t  gridZ
+
+
uint8_t  isSharedMemoryCarveoutRequested
+
+
CUpti_ActivityKind kind
+
+
uint8_t  launchType
+
+
uint32_t  localMemoryPerThread
+
+
uint32_t  localMemoryTotal
+
+
uint64_t  localMemoryTotal_v2
+
+
const + char + * name
+
+
+ CUaccessPolicyWindow + * pAccessPolicyWindow
+
+
uint8_t  padding
+
+
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheExecuted
+
+
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheRequested
+
+
uint64_t  queued
+
+
uint16_t  registersPerThread
+
+
uint8_t  requested
+
+
+ void + * reserved0
+
+
uint8_t  sharedMemoryCarveoutRequested
+
+
uint8_t  sharedMemoryConfig
+
+
uint32_t  sharedMemoryExecuted
+
+
CUpti_FuncShmemLimitConfig shmemLimitConfig
+
+
uint64_t  start
+
+
int32_t  staticSharedMemory
+
+
uint32_t  streamId
+
+
uint64_t  submitted
+
+
+
+

Variables

+
+
+ int32_t CUpti_ActivityKernel8::blockX [inherited]
+
+
+

The X-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel8::blockY [inherited]
+
+
+

The Y-dimension block size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel8::blockZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ CUpti_ActivityKernel8::@17 CUpti_ActivityKernel8::cacheConfig [inherited]
+
+
+

For devices with compute capability 7.0+ cacheConfig values are not updated in case field isSharedMemoryCarveoutRequested + is set +

+
+
+
+ uint32_t CUpti_ActivityKernel8::channelID [inherited]
+
+
+

The ID of the HW channel on which the kernel is launched.

+
+
+
+ CUpti_ChannelType CUpti_ActivityKernel8::channelType [inherited]
+
+
+

The type of the channel

+
+
+
+ uint32_t CUpti_ActivityKernel8::clusterSchedulingPolicy [inherited]
+
+
+

The cluster scheduling policy for the kernel. Refer CUclusterSchedulingPolicy Field is valid for devices with compute capability + 9.0 and higher +

+
+
+
+ uint32_t CUpti_ActivityKernel8::clusterX [inherited]
+
+
+

The X-dimension cluster size for the kernel. Field is valid for devices with compute capability 9.0 and higher

+
+
+
+ uint32_t CUpti_ActivityKernel8::clusterY [inherited]
+
+
+

The Y-dimension cluster size for the kernel. Field is valid for devices with compute capability 9.0 and higher

+
+
+
+ uint32_t CUpti_ActivityKernel8::clusterZ [inherited]
+
+
+

The Z-dimension cluster size for the kernel. Field is valid for devices with compute capability 9.0 and higher

+
+
+
+ uint64_t CUpti_ActivityKernel8::completed [inherited]
+
+
+

The completed timestamp for the kernel execution, in ns. It represents the completion of all it's child kernels and the kernel + itself. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion time is unknown. +

+
+
+
+ uint32_t CUpti_ActivityKernel8::contextId [inherited]
+
+
+

The ID of the context where the kernel is executing.

+
+
+
+ uint32_t CUpti_ActivityKernel8::correlationId [inherited]
+
+
+

The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation + ID in the driver or runtime API activity record that launched the kernel. +

+
+
+
+ uint32_t CUpti_ActivityKernel8::deviceId [inherited]
+
+
+

The ID of the device where the kernel is executing.

+
+
+
+ int32_t CUpti_ActivityKernel8::dynamicSharedMemory [inherited]
+
+
+

The dynamic shared memory reserved for the kernel, in bytes.

+
+
+
+ uint64_t CUpti_ActivityKernel8::end [inherited]
+
+
+

The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ uint8_t CUpti_ActivityKernel8::executed [inherited]
+
+
+

The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ uint32_t CUpti_ActivityKernel8::graphId [inherited]
+
+
+

The unique ID of the graph that launched this kernel through graph launch APIs. This field will be 0 if the kernel is not + launched through graph launch APIs. +

+
+
+
+ uint64_t CUpti_ActivityKernel8::graphNodeId [inherited]
+
+
+

The unique ID of the graph node that launched this kernel through graph launch APIs. This field will be 0 if the kernel is + not launched through graph launch APIs. +

+
+
+
+ int64_t CUpti_ActivityKernel8::gridId [inherited]
+
+
+

The grid ID of the kernel. Each kernel is assigned a unique grid ID at runtime.

+
+
+
+ int32_t CUpti_ActivityKernel8::gridX [inherited]
+
+
+

The X-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel8::gridY [inherited]
+
+
+

The Y-dimension grid size for the kernel.

+
+
+
+ int32_t CUpti_ActivityKernel8::gridZ [inherited]
+
+
+

The Z-dimension grid size for the kernel.

+
+
+
+ uint8_t CUpti_ActivityKernel8::isSharedMemoryCarveoutRequested [inherited]
+
+
+

This indicates if CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT was updated for the kernel launch

+
+
+
+ CUpti_ActivityKindCUpti_ActivityKernel8::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL.

+
+
+
+ uint8_t CUpti_ActivityKernel8::launchType [inherited]
+
+
+

The indicates if the kernel was executed via a regular launch or via a single/multi device cooperative launch.

+

See also:

+

CUpti_ActivityLaunchType

+

+
+
+
+ uint32_t CUpti_ActivityKernel8::localMemoryPerThread [inherited]
+
+
+

The amount of local memory reserved for each thread, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel8::localMemoryTotal [inherited]
+
+
+

The total amount of local memory reserved for the kernel, in bytes (deprecated in CUDA 11.8). Refer field localMemoryTotal_v2 + +

+
+
+
+ uint64_t CUpti_ActivityKernel8::localMemoryTotal_v2 [inherited]
+
+
+

The total amount of local memory reserved for the kernel, in bytes.

+
+
+
+ const + + char + * CUpti_ActivityKernel8::name [inherited]
+
+
+

The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be + modified. +

+
+
+
+ + + CUaccessPolicyWindow + * CUpti_ActivityKernel8::pAccessPolicyWindow [inherited]
+
+
+

The pointer to the access policy window. The structure CUaccessPolicyWindow is defined in cuda.h.

+
+
+
+ uint8_t CUpti_ActivityKernel8::padding [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ CUpti_ActivityPartitionedGlobalCacheConfigCUpti_ActivityKernel8::partitionedGlobalCacheExecuted [inherited]
+
+
+

The partitioned global caching executed for the kernel. Partitioned global caching is required to enable caching on certain + chips, such as devices with compute capability 5.2. Partitioned global caching can be automatically disabled if the occupancy + requirement of the launch cannot support caching. +

+
+
+
+ CUpti_ActivityPartitionedGlobalCacheConfigCUpti_ActivityKernel8::partitionedGlobalCacheRequested [inherited]
+
+
+

The partitioned global caching requested for the kernel. Partitioned global caching is required to enable caching on certain + chips, such as devices with compute capability 5.2. +

+
+
+
+ uint64_t CUpti_ActivityKernel8::queued [inherited]
+
+
+

The timestamp when the kernel is queued up in the command buffer, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that + the queued time could not be collected for the kernel. This timestamp is not collected by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection. +

+

Command buffer is a buffer written by CUDA driver to send commands like kernel launch, memory copy etc to the GPU. All launches + of CUDA kernels are asynchrnous with respect to the host, the host requests the launch by writing commands into the command + buffer, then returns without checking the GPU's progress. +

+
+
+
+ uint16_t CUpti_ActivityKernel8::registersPerThread [inherited]
+
+
+

The number of registers required for each thread executing the kernel.

+
+
+
+ uint8_t CUpti_ActivityKernel8::requested [inherited]
+
+
+

The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h.

+
+
+
+ + + void + * CUpti_ActivityKernel8::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint8_t CUpti_ActivityKernel8::sharedMemoryCarveoutRequested [inherited]
+
+
+

Shared memory carveout value requested for the function in percentage of the total resource. The value will be updated only + if field isSharedMemoryCarveoutRequested is set. +

+
+
+
+ uint8_t CUpti_ActivityKernel8::sharedMemoryConfig [inherited]
+
+
+

The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. + +

+
+
+
+ uint32_t CUpti_ActivityKernel8::sharedMemoryExecuted [inherited]
+
+
+

Shared memory size set by the driver.

+
+
+
+ CUpti_FuncShmemLimitConfigCUpti_ActivityKernel8::shmemLimitConfig [inherited]
+
+
+

The shared memory limit config for the kernel. This field shows whether user has opted for a higher per block limit of dynamic + shared memory. +

+
+
+
+ uint64_t CUpti_ActivityKernel8::start [inherited]
+
+
+

The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the kernel. +

+
+
+
+ int32_t CUpti_ActivityKernel8::staticSharedMemory [inherited]
+
+
+

The static shared memory allocated for the kernel, in bytes.

+
+
+
+ uint32_t CUpti_ActivityKernel8::streamId [inherited]
+
+
+

The ID of the stream where the kernel is executing.

+
+
+
+ uint64_t CUpti_ActivityKernel8::submitted [inherited]
+
+
+

The timestamp when the command buffer containing the kernel launch is submitted to the GPU, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN + indicates that the submitted time could not be collected for the kernel. This timestamp is not collected by default. Use API + cuptiActivityEnableLatencyTimestamps() to enable collection. +

+
+
+
+
+
+
+

6.44. CUpti_ActivityMarker Struct Reference

+

[CUPTI Activity API] +

+
+

The marker is specified with a descriptive name and unique id (CUPTI_ACTIVITY_KIND_MARKER). Marker activity is now reported + using the CUpti_ActivityMarker2 activity record. +

+
+

Public Variables

+
+
CUpti_ActivityFlag flags
+
+
uint32_t  id
+
+
CUpti_ActivityKind kind
+
+
const + char + * name
+
+
union CUpti_ActivityObjectKindId objectId
+
+
CUpti_ActivityObjectKind objectKind
+
+
uint64_t  timestamp
+
+
+
+

Variables

+
+
+ CUpti_ActivityFlagCUpti_ActivityMarker::flags [inherited]
+
+
+

The flags associated with the marker.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint32_t CUpti_ActivityMarker::id [inherited]
+
+
+

The marker ID.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMarker::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MARKER.

+
+
+
+ const + + char + * CUpti_ActivityMarker::name [inherited]
+
+
+

The marker name for an instantaneous or start marker. This will be NULL for an end marker.

+
+
+
+ union CUpti_ActivityObjectKindIdCUpti_ActivityMarker::objectId [inherited]
+
+
+

The identifier for the activity object associated with this marker. 'objectKind' indicates which ID is valid for this record. + +

+
+
+
+ CUpti_ActivityObjectKindCUpti_ActivityMarker::objectKind [inherited]
+
+
+

The kind of activity object associated with this marker.

+
+
+
+ uint64_t CUpti_ActivityMarker::timestamp [inherited]
+
+
+

The timestamp for the marker, in ns. A value of 0 indicates that timestamp information could not be collected for the marker. + +

+
+
+
+
+
+
+

6.45. CUpti_ActivityMarker2 Struct Reference

+

[CUPTI Activity API] +

+
+

The marker is specified with a descriptive name and unique id (CUPTI_ACTIVITY_KIND_MARKER).

+
+

Public Variables

+
+
const + char + * domain
+
+
CUpti_ActivityFlag flags
+
+
uint32_t  id
+
+
CUpti_ActivityKind kind
+
+
const + char + * name
+
+
union CUpti_ActivityObjectKindId objectId
+
+
CUpti_ActivityObjectKind objectKind
+
+
uint32_t  pad
+
+
uint64_t  timestamp
+
+
+
+

Variables

+
+
+ const + + char + * CUpti_ActivityMarker2::domain [inherited]
+
+
+

The name of the domain to which this marker belongs to. This will be NULL for default domain.

+
+
+
+ CUpti_ActivityFlagCUpti_ActivityMarker2::flags [inherited]
+
+
+

The flags associated with the marker.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint32_t CUpti_ActivityMarker2::id [inherited]
+
+
+

The marker ID.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMarker2::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MARKER.

+
+
+
+ const + + char + * CUpti_ActivityMarker2::name [inherited]
+
+
+

The marker name for an instantaneous or start marker. This will be NULL for an end marker.

+
+
+
+ union CUpti_ActivityObjectKindIdCUpti_ActivityMarker2::objectId [inherited]
+
+
+

The identifier for the activity object associated with this marker. 'objectKind' indicates which ID is valid for this record. + +

+
+
+
+ CUpti_ActivityObjectKindCUpti_ActivityMarker2::objectKind [inherited]
+
+
+

The kind of activity object associated with this marker.

+
+
+
+ uint32_t CUpti_ActivityMarker2::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint64_t CUpti_ActivityMarker2::timestamp [inherited]
+
+
+

The timestamp for the marker, in ns. A value of 0 indicates that timestamp information could not be collected for the marker. + +

+
+
+
+
+
+
+

6.46. CUpti_ActivityMarkerData Struct Reference

+

[CUPTI Activity API] +

+
+

The marker data contains color, payload, and category. (CUPTI_ACTIVITY_KIND_MARKER_DATA).

+
+

Public Variables

+
+
uint32_t  category
+
+
uint32_t  color
+
+
CUpti_ActivityFlag flags
+
+
uint32_t  id
+
+
CUpti_ActivityKind kind
+
+
union CUpti_MetricValue payload
+
+
CUpti_MetricValueKind payloadKind
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityMarkerData::category [inherited]
+
+
+

The category for the marker.

+
+
+
+ uint32_t CUpti_ActivityMarkerData::color [inherited]
+
+
+

The color for the marker.

+
+
+
+ CUpti_ActivityFlagCUpti_ActivityMarkerData::flags [inherited]
+
+
+

The flags associated with the marker.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint32_t CUpti_ActivityMarkerData::id [inherited]
+
+
+

The marker ID.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMarkerData::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MARKER_DATA.

+
+
+
+ union CUpti_MetricValueCUpti_ActivityMarkerData::payload [inherited]
+
+
+

The payload value.

+
+
+
+ CUpti_MetricValueKindCUpti_ActivityMarkerData::payloadKind [inherited]
+
+
+

Defines the payload format for the value associated with the marker.

+
+
+
+
+
+
+

6.47. CUpti_ActivityMemcpy Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a memory copy (CUPTI_ACTIVITY_KIND_MEMCPY).

+
+

Public Variables

+
+
uint64_t  bytes
+
+
uint32_t  contextId
+
+
uint8_t  copyKind
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint8_t  dstKind
+
+
uint64_t  end
+
+
uint8_t  flags
+
+
CUpti_ActivityKind kind
+
+
+ void + * reserved0
+
+
uint32_t  runtimeCorrelationId
+
+
uint8_t  srcKind
+
+
uint64_t  start
+
+
uint32_t  streamId
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemcpy::bytes [inherited]
+
+
+

The number of bytes transferred by the memory copy.

+
+
+
+ uint32_t CUpti_ActivityMemcpy::contextId [inherited]
+
+
+

The ID of the context where the memory copy is occurring.

+
+
+
+ uint8_t CUpti_ActivityMemcpy::copyKind [inherited]
+
+
+

The kind of the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemcpyKind

+

+
+
+
+ uint32_t CUpti_ActivityMemcpy::correlationId [inherited]
+
+
+

The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation + ID in the driver API activity record that launched the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpy::deviceId [inherited]
+
+
+

The ID of the device where the memory copy is occurring.

+
+
+
+ uint8_t CUpti_ActivityMemcpy::dstKind [inherited]
+
+
+

The destination memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpy::end [inherited]
+
+
+

The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint8_t CUpti_ActivityMemcpy::flags [inherited]
+
+
+

The flags associated with the memory copy.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemcpy::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY.

+
+
+
+ + + void + * CUpti_ActivityMemcpy::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityMemcpy::runtimeCorrelationId [inherited]
+
+
+

The runtime correlation ID of the memory copy. Each memory copy is assigned a unique runtime correlation ID that is identical + to the correlation ID in the runtime API activity record that launched the memory copy. +

+
+
+
+ uint8_t CUpti_ActivityMemcpy::srcKind [inherited]
+
+
+

The source memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpy::start [inherited]
+
+
+

The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpy::streamId [inherited]
+
+
+

The ID of the stream where the memory copy is occurring.

+
+
+
+
+
+
+

6.48. CUpti_ActivityMemcpy3 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a memory copy (CUPTI_ACTIVITY_KIND_MEMCPY).

+
+

Public Variables

+
+
uint64_t  bytes
+
+
uint32_t  contextId
+
+
uint8_t  copyKind
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint8_t  dstKind
+
+
uint64_t  end
+
+
uint8_t  flags
+
+
uint64_t  graphNodeId
+
+
CUpti_ActivityKind kind
+
+
+ void + * reserved0
+
+
uint32_t  runtimeCorrelationId
+
+
uint8_t  srcKind
+
+
uint64_t  start
+
+
uint32_t  streamId
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemcpy3::bytes [inherited]
+
+
+

The number of bytes transferred by the memory copy.

+
+
+
+ uint32_t CUpti_ActivityMemcpy3::contextId [inherited]
+
+
+

The ID of the context where the memory copy is occurring.

+
+
+
+ uint8_t CUpti_ActivityMemcpy3::copyKind [inherited]
+
+
+

The kind of the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemcpyKind

+

+
+
+
+ uint32_t CUpti_ActivityMemcpy3::correlationId [inherited]
+
+
+

The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation + ID in the driver API activity record that launched the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpy3::deviceId [inherited]
+
+
+

The ID of the device where the memory copy is occurring.

+
+
+
+ uint8_t CUpti_ActivityMemcpy3::dstKind [inherited]
+
+
+

The destination memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpy3::end [inherited]
+
+
+

The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint8_t CUpti_ActivityMemcpy3::flags [inherited]
+
+
+

The flags associated with the memory copy.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint64_t CUpti_ActivityMemcpy3::graphNodeId [inherited]
+
+
+

The unique ID of the graph node that executed this memcpy through graph launch. This field will be 0 if the memcpy is not + done through graph launch. +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemcpy3::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY.

+
+
+
+ + + void + * CUpti_ActivityMemcpy3::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityMemcpy3::runtimeCorrelationId [inherited]
+
+
+

The runtime correlation ID of the memory copy. Each memory copy is assigned a unique runtime correlation ID that is identical + to the correlation ID in the runtime API activity record that launched the memory copy. +

+
+
+
+ uint8_t CUpti_ActivityMemcpy3::srcKind [inherited]
+
+
+

The source memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpy3::start [inherited]
+
+
+

The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpy3::streamId [inherited]
+
+
+

The ID of the stream where the memory copy is occurring.

+
+
+
+
+
+
+

6.49. CUpti_ActivityMemcpy4 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a memory copy (CUPTI_ACTIVITY_KIND_MEMCPY).

+
+

Public Variables

+
+
uint64_t  bytes
+
+
uint32_t  contextId
+
+
uint8_t  copyKind
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint8_t  dstKind
+
+
uint64_t  end
+
+
uint8_t  flags
+
+
uint32_t  graphId
+
+
uint64_t  graphNodeId
+
+
CUpti_ActivityKind kind
+
+
uint32_t  padding
+
+
+ void + * reserved0
+
+
uint32_t  runtimeCorrelationId
+
+
uint8_t  srcKind
+
+
uint64_t  start
+
+
uint32_t  streamId
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemcpy4::bytes [inherited]
+
+
+

The number of bytes transferred by the memory copy.

+
+
+
+ uint32_t CUpti_ActivityMemcpy4::contextId [inherited]
+
+
+

The ID of the context where the memory copy is occurring.

+
+
+
+ uint8_t CUpti_ActivityMemcpy4::copyKind [inherited]
+
+
+

The kind of the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemcpyKind

+

+
+
+
+ uint32_t CUpti_ActivityMemcpy4::correlationId [inherited]
+
+
+

The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation + ID in the driver API activity record that launched the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpy4::deviceId [inherited]
+
+
+

The ID of the device where the memory copy is occurring.

+
+
+
+ uint8_t CUpti_ActivityMemcpy4::dstKind [inherited]
+
+
+

The destination memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpy4::end [inherited]
+
+
+

The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint8_t CUpti_ActivityMemcpy4::flags [inherited]
+
+
+

The flags associated with the memory copy.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint32_t CUpti_ActivityMemcpy4::graphId [inherited]
+
+
+

The unique ID of the graph that executed this memcpy through graph launch. This field will be 0 if the memcpy is not done + through graph launch. +

+
+
+
+ uint64_t CUpti_ActivityMemcpy4::graphNodeId [inherited]
+
+
+

The unique ID of the graph node that executed this memcpy through graph launch. This field will be 0 if the memcpy is not + done through graph launch. +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemcpy4::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY.

+
+
+
+ uint32_t CUpti_ActivityMemcpy4::padding [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ + + void + * CUpti_ActivityMemcpy4::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityMemcpy4::runtimeCorrelationId [inherited]
+
+
+

The runtime correlation ID of the memory copy. Each memory copy is assigned a unique runtime correlation ID that is identical + to the correlation ID in the runtime API activity record that launched the memory copy. +

+
+
+
+ uint8_t CUpti_ActivityMemcpy4::srcKind [inherited]
+
+
+

The source memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpy4::start [inherited]
+
+
+

The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpy4::streamId [inherited]
+
+
+

The ID of the stream where the memory copy is occurring.

+
+
+
+
+
+
+

6.50. CUpti_ActivityMemcpy5 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a memory copy (CUPTI_ACTIVITY_KIND_MEMCPY).

+
+

Public Variables

+
+
uint64_t  bytes
+
+
uint32_t  channelID
+
+
CUpti_ChannelType  channelType
+
+
uint32_t  contextId
+
+
uint8_t  copyKind
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint8_t  dstKind
+
+
uint64_t  end
+
+
uint8_t  flags
+
+
uint32_t  graphId
+
+
uint64_t  graphNodeId
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pad2
+
+
+ void + * reserved0
+
+
uint32_t  runtimeCorrelationId
+
+
uint8_t  srcKind
+
+
uint64_t  start
+
+
uint32_t  streamId
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemcpy5::bytes [inherited]
+
+
+

The number of bytes transferred by the memory copy.

+
+
+
+ uint32_t CUpti_ActivityMemcpy5::channelID [inherited]
+
+
+

The ID of the HW channel on which the memory copy is occuring.

+
+
+
+ CUpti_ChannelType CUpti_ActivityMemcpy5::channelType [inherited]
+
+
+

The type of the channel

+
+
+
+ uint32_t CUpti_ActivityMemcpy5::contextId [inherited]
+
+
+

The ID of the context where the memory copy is occurring.

+
+
+
+ uint8_t CUpti_ActivityMemcpy5::copyKind [inherited]
+
+
+

The kind of the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemcpyKind

+

+
+
+
+ uint32_t CUpti_ActivityMemcpy5::correlationId [inherited]
+
+
+

The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation + ID in the driver API activity record that launched the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpy5::deviceId [inherited]
+
+
+

The ID of the device where the memory copy is occurring.

+
+
+
+ uint8_t CUpti_ActivityMemcpy5::dstKind [inherited]
+
+
+

The destination memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpy5::end [inherited]
+
+
+

The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint8_t CUpti_ActivityMemcpy5::flags [inherited]
+
+
+

The flags associated with the memory copy.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint32_t CUpti_ActivityMemcpy5::graphId [inherited]
+
+
+

The unique ID of the graph that executed this memcpy through graph launch. This field will be 0 if the memcpy is not done + through graph launch. +

+
+
+
+ uint64_t CUpti_ActivityMemcpy5::graphNodeId [inherited]
+
+
+

The unique ID of the graph node that executed this memcpy through graph launch. This field will be 0 if the memcpy is not + done through graph launch. +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemcpy5::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY.

+
+
+
+ uint32_t CUpti_ActivityMemcpy5::pad2 [inherited]
+
+
+

Reserved for internal use.

+
+
+
+ + + void + * CUpti_ActivityMemcpy5::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityMemcpy5::runtimeCorrelationId [inherited]
+
+
+

The runtime correlation ID of the memory copy. Each memory copy is assigned a unique runtime correlation ID that is identical + to the correlation ID in the runtime API activity record that launched the memory copy. +

+
+
+
+ uint8_t CUpti_ActivityMemcpy5::srcKind [inherited]
+
+
+

The source memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpy5::start [inherited]
+
+
+

The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpy5::streamId [inherited]
+
+
+

The ID of the stream where the memory copy is occurring.

+
+
+
+
+
+
+

6.51. CUpti_ActivityMemcpyPtoP Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a peer-to-peer memory copy (CUPTI_ACTIVITY_KIND_MEMCPY2) but is no longer generated by CUPTI. + Peer-to-peer memory copy activities are now reported using the CUpti_ActivityMemcpyPtoP2 activity record.. +

+
+

Public Variables

+
+
uint64_t  bytes
+
+
uint32_t  contextId
+
+
uint8_t  copyKind
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint32_t  dstContextId
+
+
uint32_t  dstDeviceId
+
+
uint8_t  dstKind
+
+
uint64_t  end
+
+
uint8_t  flags
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pad
+
+
+ void + * reserved0
+
+
uint32_t  srcContextId
+
+
uint32_t  srcDeviceId
+
+
uint8_t  srcKind
+
+
uint64_t  start
+
+
uint32_t  streamId
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemcpyPtoP::bytes [inherited]
+
+
+

The number of bytes transferred by the memory copy.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP::contextId [inherited]
+
+
+

The ID of the context where the memory copy is occurring.

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP::copyKind [inherited]
+
+
+

The kind of the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemcpyKind

+

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP::correlationId [inherited]
+
+
+

The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation + ID in the driver and runtime API activity record that launched the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP::deviceId [inherited]
+
+
+

The ID of the device where the memory copy is occurring.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP::dstContextId [inherited]
+
+
+

The ID of the context owning the memory being copied to.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP::dstDeviceId [inherited]
+
+
+

The ID of the device where memory is being copied to.

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP::dstKind [inherited]
+
+
+

The destination memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpyPtoP::end [inherited]
+
+
+

The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP::flags [inherited]
+
+
+

The flags associated with the memory copy.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemcpyPtoP::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY2.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ + + void + * CUpti_ActivityMemcpyPtoP::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP::srcContextId [inherited]
+
+
+

The ID of the context owning the memory being copied from.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP::srcDeviceId [inherited]
+
+
+

The ID of the device where memory is being copied from.

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP::srcKind [inherited]
+
+
+

The source memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpyPtoP::start [inherited]
+
+
+

The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP::streamId [inherited]
+
+
+

The ID of the stream where the memory copy is occurring.

+
+
+
+
+
+
+

6.52. CUpti_ActivityMemcpyPtoP2 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a peer-to-peer memory copy (CUPTI_ACTIVITY_KIND_MEMCPY2).

+
+

Public Variables

+
+
uint64_t  bytes
+
+
uint32_t  contextId
+
+
uint8_t  copyKind
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint32_t  dstContextId
+
+
uint32_t  dstDeviceId
+
+
uint8_t  dstKind
+
+
uint64_t  end
+
+
uint8_t  flags
+
+
uint64_t  graphNodeId
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pad
+
+
+ void + * reserved0
+
+
uint32_t  srcContextId
+
+
uint32_t  srcDeviceId
+
+
uint8_t  srcKind
+
+
uint64_t  start
+
+
uint32_t  streamId
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemcpyPtoP2::bytes [inherited]
+
+
+

The number of bytes transferred by the memory copy.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP2::contextId [inherited]
+
+
+

The ID of the context where the memory copy is occurring.

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP2::copyKind [inherited]
+
+
+

The kind of the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemcpyKind

+

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP2::correlationId [inherited]
+
+
+

The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation + ID in the driver and runtime API activity record that launched the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP2::deviceId [inherited]
+
+
+

The ID of the device where the memory copy is occurring.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP2::dstContextId [inherited]
+
+
+

The ID of the context owning the memory being copied to.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP2::dstDeviceId [inherited]
+
+
+

The ID of the device where memory is being copied to.

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP2::dstKind [inherited]
+
+
+

The destination memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpyPtoP2::end [inherited]
+
+
+

The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP2::flags [inherited]
+
+
+

The flags associated with the memory copy.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint64_t CUpti_ActivityMemcpyPtoP2::graphNodeId [inherited]
+
+
+

The unique ID of the graph node that executed the memcpy through graph launch. This field will be 0 if memcpy is not done + using graph launch. +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemcpyPtoP2::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY2.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP2::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ + + void + * CUpti_ActivityMemcpyPtoP2::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP2::srcContextId [inherited]
+
+
+

The ID of the context owning the memory being copied from.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP2::srcDeviceId [inherited]
+
+
+

The ID of the device where memory is being copied from.

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP2::srcKind [inherited]
+
+
+

The source memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpyPtoP2::start [inherited]
+
+
+

The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP2::streamId [inherited]
+
+
+

The ID of the stream where the memory copy is occurring.

+
+
+
+
+
+
+

6.53. CUpti_ActivityMemcpyPtoP3 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a peer-to-peer memory copy (CUPTI_ACTIVITY_KIND_MEMCPY2).

+
+

Public Variables

+
+
uint64_t  bytes
+
+
uint32_t  contextId
+
+
uint8_t  copyKind
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint32_t  dstContextId
+
+
uint32_t  dstDeviceId
+
+
uint8_t  dstKind
+
+
uint64_t  end
+
+
uint8_t  flags
+
+
uint32_t  graphId
+
+
uint64_t  graphNodeId
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pad
+
+
uint32_t  padding
+
+
+ void + * reserved0
+
+
uint32_t  srcContextId
+
+
uint32_t  srcDeviceId
+
+
uint8_t  srcKind
+
+
uint64_t  start
+
+
uint32_t  streamId
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemcpyPtoP3::bytes [inherited]
+
+
+

The number of bytes transferred by the memory copy.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP3::contextId [inherited]
+
+
+

The ID of the context where the memory copy is occurring.

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP3::copyKind [inherited]
+
+
+

The kind of the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemcpyKind

+

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP3::correlationId [inherited]
+
+
+

The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation + ID in the driver and runtime API activity record that launched the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP3::deviceId [inherited]
+
+
+

The ID of the device where the memory copy is occurring.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP3::dstContextId [inherited]
+
+
+

The ID of the context owning the memory being copied to.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP3::dstDeviceId [inherited]
+
+
+

The ID of the device where memory is being copied to.

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP3::dstKind [inherited]
+
+
+

The destination memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpyPtoP3::end [inherited]
+
+
+

The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP3::flags [inherited]
+
+
+

The flags associated with the memory copy.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP3::graphId [inherited]
+
+
+

The unique ID of the graph that executed this memcpy through graph launch. This field will be 0 if the memcpy is not done + through graph launch. +

+
+
+
+ uint64_t CUpti_ActivityMemcpyPtoP3::graphNodeId [inherited]
+
+
+

The unique ID of the graph node that executed the memcpy through graph launch. This field will be 0 if memcpy is not done + using graph launch. +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemcpyPtoP3::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY2.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP3::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP3::padding [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ + + void + * CUpti_ActivityMemcpyPtoP3::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP3::srcContextId [inherited]
+
+
+

The ID of the context owning the memory being copied from.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP3::srcDeviceId [inherited]
+
+
+

The ID of the device where memory is being copied from.

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP3::srcKind [inherited]
+
+
+

The source memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpyPtoP3::start [inherited]
+
+
+

The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP3::streamId [inherited]
+
+
+

The ID of the stream where the memory copy is occurring.

+
+
+
+
+
+
+

6.54. CUpti_ActivityMemcpyPtoP4 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a peer-to-peer memory copy (CUPTI_ACTIVITY_KIND_MEMCPY2).

+
+

Public Variables

+
+
uint64_t  bytes
+
+
uint32_t  channelID
+
+
CUpti_ChannelType  channelType
+
+
uint32_t  contextId
+
+
uint8_t  copyKind
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint32_t  dstContextId
+
+
uint32_t  dstDeviceId
+
+
uint8_t  dstKind
+
+
uint64_t  end
+
+
uint8_t  flags
+
+
uint32_t  graphId
+
+
uint64_t  graphNodeId
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pad
+
+
+ void + * reserved0
+
+
uint32_t  srcContextId
+
+
uint32_t  srcDeviceId
+
+
uint8_t  srcKind
+
+
uint64_t  start
+
+
uint32_t  streamId
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemcpyPtoP4::bytes [inherited]
+
+
+

The number of bytes transferred by the memory copy.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP4::channelID [inherited]
+
+
+

The ID of the HW channel on which the memory copy is occuring.

+
+
+
+ CUpti_ChannelType CUpti_ActivityMemcpyPtoP4::channelType [inherited]
+
+
+

The type of the channel

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP4::contextId [inherited]
+
+
+

The ID of the context where the memory copy is occurring.

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP4::copyKind [inherited]
+
+
+

The kind of the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemcpyKind

+

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP4::correlationId [inherited]
+
+
+

The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation + ID in the driver and runtime API activity record that launched the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP4::deviceId [inherited]
+
+
+

The ID of the device where the memory copy is occurring.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP4::dstContextId [inherited]
+
+
+

The ID of the context owning the memory being copied to.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP4::dstDeviceId [inherited]
+
+
+

The ID of the device where memory is being copied to.

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP4::dstKind [inherited]
+
+
+

The destination memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpyPtoP4::end [inherited]
+
+
+

The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP4::flags [inherited]
+
+
+

The flags associated with the memory copy.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP4::graphId [inherited]
+
+
+

The unique ID of the graph that executed this memcpy through graph launch. This field will be 0 if the memcpy is not done + through graph launch. +

+
+
+
+ uint64_t CUpti_ActivityMemcpyPtoP4::graphNodeId [inherited]
+
+
+

The unique ID of the graph node that executed the memcpy through graph launch. This field will be 0 if memcpy is not done + using graph launch. +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemcpyPtoP4::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY2.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP4::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ + + void + * CUpti_ActivityMemcpyPtoP4::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP4::srcContextId [inherited]
+
+
+

The ID of the context owning the memory being copied from.

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP4::srcDeviceId [inherited]
+
+
+

The ID of the device where memory is being copied from.

+
+
+
+ uint8_t CUpti_ActivityMemcpyPtoP4::srcKind [inherited]
+
+
+

The source memory kind read by the memory copy, stored as a byte to reduce record size.

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint64_t CUpti_ActivityMemcpyPtoP4::start [inherited]
+
+
+

The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory copy. +

+
+
+
+ uint32_t CUpti_ActivityMemcpyPtoP4::streamId [inherited]
+
+
+

The ID of the stream where the memory copy is occurring.

+
+
+
+
+
+
+

6.55. CUpti_ActivityMemory Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a memory allocation and free operation (CUPTI_ACTIVITY_KIND_MEMORY). This activity record + provides a single record for the memory allocation and memory release operations. +

+

Note: It is recommended to move to the new activity record CUpti_ActivityMemory3 enabled using the kind CUPTI_ACTIVITY_KIND_MEMORY2. CUpti_ActivityMemory3 provides separate records for memory allocation and memory release operations. This allows to correlate the corresponding + driver and runtime API activity record with the memory operation. +

+
+

Public Variables

+
+
uint64_t  address
+
+
uint64_t  allocPC
+
+
uint64_t  bytes
+
+
uint32_t  contextId
+
+
uint32_t  deviceId
+
+
uint64_t  end
+
+
uint64_t  freePC
+
+
CUpti_ActivityKind kind
+
+
CUpti_ActivityMemoryKind memoryKind
+
+
const + char + * name
+
+
uint32_t  processId
+
+
uint64_t  start
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemory::address [inherited]
+
+
+

The virtual address of the allocation

+
+
+
+ uint64_t CUpti_ActivityMemory::allocPC [inherited]
+
+
+

The program counter of the allocation of memory

+
+
+
+ uint64_t CUpti_ActivityMemory::bytes [inherited]
+
+
+

The number of bytes of memory allocated.

+
+
+
+ uint32_t CUpti_ActivityMemory::contextId [inherited]
+
+
+

The ID of the context. If context is NULL, contextId is set to CUPTI_INVALID_CONTEXT_ID. +

+
+
+
+ uint32_t CUpti_ActivityMemory::deviceId [inherited]
+
+
+

The ID of the device where the memory allocation is taking place.

+
+
+
+ uint64_t CUpti_ActivityMemory::end [inherited]
+
+
+

The end timestamp for the memory operation, i.e. the time when memory was freed, in ns. This will be 0 if memory is not freed + in the application +

+
+
+
+ uint64_t CUpti_ActivityMemory::freePC [inherited]
+
+
+

The program counter of the freeing of memory. This will be 0 if memory is not freed in the application

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemory::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMORY

+
+
+
+ CUpti_ActivityMemoryKindCUpti_ActivityMemory::memoryKind [inherited]
+
+
+

The memory kind requested by the user

+
+
+
+ const + + char + * CUpti_ActivityMemory::name [inherited]
+
+
+

Variable name. This name is shared across all activity records representing the same symbol, and so should not be modified. + +

+
+
+
+ uint32_t CUpti_ActivityMemory::processId [inherited]
+
+
+

The ID of the process to which this record belongs to.

+
+
+
+ uint64_t CUpti_ActivityMemory::start [inherited]
+
+
+

The start timestamp for the memory operation, i.e. the time when memory was allocated, in ns.

+
+
+
+
+
+
+

6.56. CUpti_ActivityMemory2 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a memory allocation and free operation (CUPTI_ACTIVITY_KIND_MEMORY2). This activity record + provides separate records for memory allocation and memory release operations. This allows to correlate the corresponding + driver and runtime API activity record with the memory operation. +

+

Note: This activity record is an upgrade over CUpti_ActivityMemory enabled using the kind CUPTI_ACTIVITY_KIND_MEMORY. CUpti_ActivityMemory provides a single record for the memory allocation and memory release operations. +

+
+

Public Variables

+
+
uint64_t  PC
+
+
uint64_t  address
+
+
uint64_t  bytes
+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint32_t  isAsync
+
+
CUpti_ActivityKind kind
+
+
CUpti_ActivityMemoryKind memoryKind
+
+
CUpti_ActivityMemoryOperationType memoryOperationType
+
+
CUpti_ActivityMemory2::@2  memoryPoolConfig
+
+
CUpti_ActivityMemoryPoolType memoryPoolType
+
+
const + char + * name
+
+
uint64_t  processId
+
+
uint32_t  processId
+
+
uint64_t  releaseThreshold
+
+
uint64_t  size
+
+
uint32_t  streamId
+
+
uint64_t  timestamp
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemory2::PC [inherited]
+
+
+

The program counter of the memory operation.

+
+
+
+ uint64_t CUpti_ActivityMemory2::address [inherited]
+
+
+

The virtual address of the allocation.

+

The base address of the memory pool.

+
+
+
+ uint64_t CUpti_ActivityMemory2::bytes [inherited]
+
+
+

The number of bytes of memory allocated.

+
+
+
+ uint32_t CUpti_ActivityMemory2::contextId [inherited]
+
+
+

The ID of the context. If context is NULL, contextId is set to CUPTI_INVALID_CONTEXT_ID. +

+
+
+
+ uint32_t CUpti_ActivityMemory2::correlationId [inherited]
+
+
+

The correlation ID of the memory operation. Each memory operation is assigned a unique correlation ID that is identical to + the correlation ID in the driver and runtime API activity record that launched the memory operation. +

+
+
+
+ uint32_t CUpti_ActivityMemory2::deviceId [inherited]
+
+
+

The ID of the device where the memory operation is taking place.

+
+
+
+ uint32_t CUpti_ActivityMemory2::isAsync [inherited]
+
+
+

isAsync is set if memory operation happens through async memory APIs. +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemory2::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMORY2

+
+
+
+ CUpti_ActivityMemoryKindCUpti_ActivityMemory2::memoryKind [inherited]
+
+
+

The memory kind requested by the user, CUpti_ActivityMemoryKind. +

+
+
+
+ CUpti_ActivityMemoryOperationTypeCUpti_ActivityMemory2::memoryOperationType [inherited]
+
+
+

The memory operation requested by the user, CUpti_ActivityMemoryOperationType. +

+
+
+
+ CUpti_ActivityMemory2::@2 CUpti_ActivityMemory2::memoryPoolConfig [inherited]
+
+
+

The memory pool configuration used for the memory operations.

+
+
+
+ CUpti_ActivityMemoryPoolTypeCUpti_ActivityMemory2::memoryPoolType [inherited]
+
+
+

The type of the memory pool, CUpti_ActivityMemoryPoolType

+
+
+
+ const + + char + * CUpti_ActivityMemory2::name [inherited]
+
+
+

Variable name. This name is shared across all activity records representing the same symbol, and so should not be modified. + +

+
+
+
+ uint64_t CUpti_ActivityMemory2::processId [inherited]
+
+
+

The processId of the memory pool. processId is valid if memoryPoolType is CUPTI_ACTIVITY_MEMORY_POOL_TYPE_IMPORTED, CUpti_ActivityMemoryPoolType. +

+
+
+
+ uint32_t CUpti_ActivityMemory2::processId [inherited]
+
+
+

The ID of the process to which this record belongs to.

+
+
+
+ uint64_t CUpti_ActivityMemory2::releaseThreshold [inherited]
+
+
+

The release threshold of the memory pool in bytes. releaseThreshold is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+
+
+
+ uint64_t CUpti_ActivityMemory2::size [inherited]
+
+
+

The size of the memory pool in bytes. size is valid if memoryPoolType is CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+
+
+
+ uint32_t CUpti_ActivityMemory2::streamId [inherited]
+
+
+

The ID of the stream. If memory operation is not async, streamId is set to CUPTI_INVALID_STREAM_ID. +

+
+
+
+ uint64_t CUpti_ActivityMemory2::timestamp [inherited]
+
+
+

The start timestamp for the memory operation, in ns.

+
+
+
+
+
+
+

6.57. CUpti_ActivityMemory3 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a memory allocation and free operation (CUPTI_ACTIVITY_KIND_MEMORY2). This activity record + provides separate records for memory allocation and memory release operations. This allows to correlate the corresponding + driver and runtime API activity record with the memory operation. +

+

Note: This activity record is an upgrade over CUpti_ActivityMemory enabled using the kind CUPTI_ACTIVITY_KIND_MEMORY. CUpti_ActivityMemory provides a single record for the memory allocation and memory release operations. +

+
+

Public Inner Classes

+
+
struct  +
+
+
+

Public Variables

+
+
uint64_t  PC
+
+
uint64_t  address
+
+
uint64_t  bytes
+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint32_t  isAsync
+
+
CUpti_ActivityKind kind
+
+
CUpti_ActivityMemoryKind memoryKind
+
+
CUpti_ActivityMemoryOperationType memoryOperationType
+
+
struct CUpti_ActivityMemory3::​PACKED_ALIGNMENT memoryPoolConfig
+
+
const + char + * name
+
+
uint32_t  processId
+
+
uint32_t  streamId
+
+
uint64_t  timestamp
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemory3::PC [inherited]
+
+
+

The program counter of the memory operation.

+
+
+
+ uint64_t CUpti_ActivityMemory3::address [inherited]
+
+
+

The virtual address of the allocation.

+
+
+
+ uint64_t CUpti_ActivityMemory3::bytes [inherited]
+
+
+

The number of bytes of memory allocated.

+
+
+
+ uint32_t CUpti_ActivityMemory3::contextId [inherited]
+
+
+

The ID of the context. If context is NULL, contextId is set to CUPTI_INVALID_CONTEXT_ID. +

+
+
+
+ uint32_t CUpti_ActivityMemory3::correlationId [inherited]
+
+
+

The correlation ID of the memory operation. Each memory operation is assigned a unique correlation ID that is identical to + the correlation ID in the driver and runtime API activity record that launched the memory operation. +

+
+
+
+ uint32_t CUpti_ActivityMemory3::deviceId [inherited]
+
+
+

The ID of the device where the memory operation is taking place.

+
+
+
+ uint32_t CUpti_ActivityMemory3::isAsync [inherited]
+
+
+

isAsync is set if memory operation happens through async memory APIs. +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemory3::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMORY2

+
+
+
+ CUpti_ActivityMemoryKindCUpti_ActivityMemory3::memoryKind [inherited]
+
+
+

The memory kind requested by the user, CUpti_ActivityMemoryKind. +

+
+
+
+ CUpti_ActivityMemoryOperationTypeCUpti_ActivityMemory3::memoryOperationType [inherited]
+
+
+

The memory operation requested by the user, CUpti_ActivityMemoryOperationType. +

+
+
+
+ struct CUpti_ActivityMemory3::​PACKED_ALIGNMENTCUpti_ActivityMemory3::memoryPoolConfig [inherited]
+
+
+

The memory pool configuration used for the memory operations.

+
+
+
+ const + + char + * CUpti_ActivityMemory3::name [inherited]
+
+
+

Variable name. This name is shared across all activity records representing the same symbol, and so should not be modified. + +

+
+
+
+ uint32_t CUpti_ActivityMemory3::processId [inherited]
+
+
+

The ID of the process to which this record belongs to.

+
+
+
+ uint32_t CUpti_ActivityMemory3::streamId [inherited]
+
+
+

The ID of the stream. If memory operation is not async, streamId is set to CUPTI_INVALID_STREAM_ID. +

+
+
+
+ uint64_t CUpti_ActivityMemory3::timestamp [inherited]
+
+
+

The start timestamp for the memory operation, in ns.

+
+
+
+
+
+
+

6.58. CUpti_ActivityMemory3::PACKED_ALIGNMENT Struct Reference

+
+

The memory pool configuration used for the memory operations.

+
+

Public Variables

+
+
uint64_t  address
+
+
CUpti_ActivityMemoryPoolType memoryPoolType
+
+
uint64_t  processId
+
+
uint64_t  releaseThreshold
+
+
uint64_t  size
+
+
uint64_t  utilizedSize
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemory3::​PACKED_ALIGNMENT::address [inherited]
+
+
+

The base address of the memory pool.

+
+
+
+ CUpti_ActivityMemoryPoolTypeCUpti_ActivityMemory3::​PACKED_ALIGNMENT::memoryPoolType [inherited]
+
+
+

The type of the memory pool, CUpti_ActivityMemoryPoolType

+
+
+
+ uint64_t CUpti_ActivityMemory3::​PACKED_ALIGNMENT::processId [inherited]
+
+
+

The processId of the memory pool. processId is valid if memoryPoolType is CUPTI_ACTIVITY_MEMORY_POOL_TYPE_IMPORTED, CUpti_ActivityMemoryPoolType. +

+
+
+
+ uint64_t CUpti_ActivityMemory3::​PACKED_ALIGNMENT::releaseThreshold [inherited]
+
+
+

The release threshold of the memory pool in bytes. releaseThreshold is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+
+
+
+ uint64_t CUpti_ActivityMemory3::​PACKED_ALIGNMENT::size [inherited]
+
+
+

The size of the memory pool in bytes. size is valid if memoryPoolType is CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+
+
+
+ uint64_t CUpti_ActivityMemory3::​PACKED_ALIGNMENT::utilizedSize [inherited]
+
+
+

The utilized size of the memory pool. utilizedSize is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+
+
+
+
+
+
+

6.59. CUpti_ActivityMemoryPool Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a memory pool creation, destruction and trimming (CUPTI_ACTIVITY_KIND_MEMORY_POOL). This activity + record provides separate records for memory pool creation, destruction and triming operations. This allows to correlate the + corresponding driver and runtime API activity record with the memory pool operation. +

+
+

Public Variables

+
+
uint64_t  address
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
CUpti_ActivityKind kind
+
+
CUpti_ActivityMemoryPoolOperationType memoryPoolOperationType
+
+
CUpti_ActivityMemoryPoolType memoryPoolType
+
+
size_t  minBytesToKeep
+
+
uint32_t  pad
+
+
uint32_t  processId
+
+
uint64_t  releaseThreshold
+
+
uint64_t  size
+
+
uint64_t  timestamp
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemoryPool::address [inherited]
+
+
+

The virtual address of the allocation.

+
+
+
+ uint32_t CUpti_ActivityMemoryPool::correlationId [inherited]
+
+
+

The correlation ID of the memory pool operation. Each memory pool operation is assigned a unique correlation ID that is identical + to the correlation ID in the driver and runtime API activity record that launched the memory operation. +

+
+
+
+ uint32_t CUpti_ActivityMemoryPool::deviceId [inherited]
+
+
+

The ID of the device where the memory pool is created.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemoryPool::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMORY_POOL

+
+
+
+ CUpti_ActivityMemoryPoolOperationTypeCUpti_ActivityMemoryPool::memoryPoolOperationType [inherited]
+
+
+

The memory operation requested by the user, CUpti_ActivityMemoryPoolOperationType. +

+
+
+
+ CUpti_ActivityMemoryPoolTypeCUpti_ActivityMemoryPool::memoryPoolType [inherited]
+
+
+

The type of the memory pool, CUpti_ActivityMemoryPoolType

+
+
+
+ size_t CUpti_ActivityMemoryPool::minBytesToKeep [inherited]
+
+
+

The minimum bytes to keep of the memory pool. minBytesToKeep is valid for CUPTI_ACTIVITY_MEMORY_POOL_OPERATION_TYPE_TRIMMED, CUpti_ActivityMemoryPoolOperationType

+
+
+
+ uint32_t CUpti_ActivityMemoryPool::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityMemoryPool::processId [inherited]
+
+
+

The ID of the process to which this record belongs to.

+
+
+
+ uint64_t CUpti_ActivityMemoryPool::releaseThreshold [inherited]
+
+
+

The release threshold of the memory pool. releaseThreshold is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+
+
+
+ uint64_t CUpti_ActivityMemoryPool::size [inherited]
+
+
+

The size of the memory pool operation in bytes. size is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+
+
+
+ uint64_t CUpti_ActivityMemoryPool::timestamp [inherited]
+
+
+

The start timestamp for the memory operation, in ns.

+
+
+
+
+
+
+

6.60. CUpti_ActivityMemoryPool2 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a memory pool creation, destruction and trimming (CUPTI_ACTIVITY_KIND_MEMORY_POOL). This activity + record provides separate records for memory pool creation, destruction and triming operations. This allows to correlate the + corresponding driver and runtime API activity record with the memory pool operation. +

+
+

Public Variables

+
+
uint64_t  address
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
CUpti_ActivityKind kind
+
+
CUpti_ActivityMemoryPoolOperationType memoryPoolOperationType
+
+
CUpti_ActivityMemoryPoolType memoryPoolType
+
+
size_t  minBytesToKeep
+
+
uint32_t  pad
+
+
uint32_t  processId
+
+
uint64_t  releaseThreshold
+
+
uint64_t  size
+
+
uint64_t  timestamp
+
+
uint64_t  utilizedSize
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemoryPool2::address [inherited]
+
+
+

The virtual address of the allocation.

+
+
+
+ uint32_t CUpti_ActivityMemoryPool2::correlationId [inherited]
+
+
+

The correlation ID of the memory pool operation. Each memory pool operation is assigned a unique correlation ID that is identical + to the correlation ID in the driver and runtime API activity record that launched the memory operation. +

+
+
+
+ uint32_t CUpti_ActivityMemoryPool2::deviceId [inherited]
+
+
+

The ID of the device where the memory pool is created.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemoryPool2::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMORY_POOL

+
+
+
+ CUpti_ActivityMemoryPoolOperationTypeCUpti_ActivityMemoryPool2::memoryPoolOperationType [inherited]
+
+
+

The memory operation requested by the user, CUpti_ActivityMemoryPoolOperationType. +

+
+
+
+ CUpti_ActivityMemoryPoolTypeCUpti_ActivityMemoryPool2::memoryPoolType [inherited]
+
+
+

The type of the memory pool, CUpti_ActivityMemoryPoolType

+
+
+
+ size_t CUpti_ActivityMemoryPool2::minBytesToKeep [inherited]
+
+
+

The minimum bytes to keep of the memory pool. minBytesToKeep is valid for CUPTI_ACTIVITY_MEMORY_POOL_OPERATION_TYPE_TRIMMED, CUpti_ActivityMemoryPoolOperationType

+
+
+
+ uint32_t CUpti_ActivityMemoryPool2::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityMemoryPool2::processId [inherited]
+
+
+

The ID of the process to which this record belongs to.

+
+
+
+ uint64_t CUpti_ActivityMemoryPool2::releaseThreshold [inherited]
+
+
+

The release threshold of the memory pool. releaseThreshold is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+
+
+
+ uint64_t CUpti_ActivityMemoryPool2::size [inherited]
+
+
+

The size of the memory pool operation in bytes. size is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+
+
+
+ uint64_t CUpti_ActivityMemoryPool2::timestamp [inherited]
+
+
+

The start timestamp for the memory operation, in ns.

+
+
+
+ uint64_t CUpti_ActivityMemoryPool2::utilizedSize [inherited]
+
+
+

The utilized size of the memory pool. utilizedSize is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+
+
+
+
+
+
+

6.61. CUpti_ActivityMemset Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a memory set operation (CUPTI_ACTIVITY_KIND_MEMSET).

+
+

Public Variables

+
+
uint64_t  bytes
+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint64_t  end
+
+
uint16_t  flags
+
+
CUpti_ActivityKind kind
+
+
uint16_t  memoryKind
+
+
+ void + * reserved0
+
+
uint64_t  start
+
+
uint32_t  streamId
+
+
uint32_t  value
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemset::bytes [inherited]
+
+
+

The number of bytes being set by the memory set.

+
+
+
+ uint32_t CUpti_ActivityMemset::contextId [inherited]
+
+
+

The ID of the context where the memory set is occurring.

+
+
+
+ uint32_t CUpti_ActivityMemset::correlationId [inherited]
+
+
+

The correlation ID of the memory set. Each memory set is assigned a unique correlation ID that is identical to the correlation + ID in the driver API activity record that launched the memory set. +

+
+
+
+ uint32_t CUpti_ActivityMemset::deviceId [inherited]
+
+
+

The ID of the device where the memory set is occurring.

+
+
+
+ uint64_t CUpti_ActivityMemset::end [inherited]
+
+
+

The end timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory set. +

+
+
+
+ uint16_t CUpti_ActivityMemset::flags [inherited]
+
+
+

The flags associated with the memset.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemset::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMSET.

+
+
+
+ uint16_t CUpti_ActivityMemset::memoryKind [inherited]
+
+
+

The memory kind of the memory set

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ + + void + * CUpti_ActivityMemset::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint64_t CUpti_ActivityMemset::start [inherited]
+
+
+

The start timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory set. +

+
+
+
+ uint32_t CUpti_ActivityMemset::streamId [inherited]
+
+
+

The ID of the stream where the memory set is occurring.

+
+
+
+ uint32_t CUpti_ActivityMemset::value [inherited]
+
+
+

The value being assigned to memory by the memory set.

+
+
+
+
+
+
+

6.62. CUpti_ActivityMemset2 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a memory set operation (CUPTI_ACTIVITY_KIND_MEMSET).

+
+

Public Variables

+
+
uint64_t  bytes
+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint64_t  end
+
+
uint16_t  flags
+
+
uint64_t  graphNodeId
+
+
CUpti_ActivityKind kind
+
+
uint16_t  memoryKind
+
+
+ void + * reserved0
+
+
uint64_t  start
+
+
uint32_t  streamId
+
+
uint32_t  value
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemset2::bytes [inherited]
+
+
+

The number of bytes being set by the memory set.

+
+
+
+ uint32_t CUpti_ActivityMemset2::contextId [inherited]
+
+
+

The ID of the context where the memory set is occurring.

+
+
+
+ uint32_t CUpti_ActivityMemset2::correlationId [inherited]
+
+
+

The correlation ID of the memory set. Each memory set is assigned a unique correlation ID that is identical to the correlation + ID in the driver API activity record that launched the memory set. +

+
+
+
+ uint32_t CUpti_ActivityMemset2::deviceId [inherited]
+
+
+

The ID of the device where the memory set is occurring.

+
+
+
+ uint64_t CUpti_ActivityMemset2::end [inherited]
+
+
+

The end timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory set. +

+
+
+
+ uint16_t CUpti_ActivityMemset2::flags [inherited]
+
+
+

The flags associated with the memset.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint64_t CUpti_ActivityMemset2::graphNodeId [inherited]
+
+
+

The unique ID of the graph node that executed this memset through graph launch. This field will be 0 if the memset is not + executed through graph launch. +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemset2::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMSET.

+
+
+
+ uint16_t CUpti_ActivityMemset2::memoryKind [inherited]
+
+
+

The memory kind of the memory set

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ + + void + * CUpti_ActivityMemset2::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint64_t CUpti_ActivityMemset2::start [inherited]
+
+
+

The start timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory set. +

+
+
+
+ uint32_t CUpti_ActivityMemset2::streamId [inherited]
+
+
+

The ID of the stream where the memory set is occurring.

+
+
+
+ uint32_t CUpti_ActivityMemset2::value [inherited]
+
+
+

The value being assigned to memory by the memory set.

+
+
+
+
+
+
+

6.63. CUpti_ActivityMemset3 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a memory set operation (CUPTI_ACTIVITY_KIND_MEMSET).

+
+

Public Variables

+
+
uint64_t  bytes
+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint64_t  end
+
+
uint16_t  flags
+
+
uint32_t  graphId
+
+
uint64_t  graphNodeId
+
+
CUpti_ActivityKind kind
+
+
uint16_t  memoryKind
+
+
uint32_t  padding
+
+
+ void + * reserved0
+
+
uint64_t  start
+
+
uint32_t  streamId
+
+
uint32_t  value
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemset3::bytes [inherited]
+
+
+

The number of bytes being set by the memory set.

+
+
+
+ uint32_t CUpti_ActivityMemset3::contextId [inherited]
+
+
+

The ID of the context where the memory set is occurring.

+
+
+
+ uint32_t CUpti_ActivityMemset3::correlationId [inherited]
+
+
+

The correlation ID of the memory set. Each memory set is assigned a unique correlation ID that is identical to the correlation + ID in the driver API activity record that launched the memory set. +

+
+
+
+ uint32_t CUpti_ActivityMemset3::deviceId [inherited]
+
+
+

The ID of the device where the memory set is occurring.

+
+
+
+ uint64_t CUpti_ActivityMemset3::end [inherited]
+
+
+

The end timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory set. +

+
+
+
+ uint16_t CUpti_ActivityMemset3::flags [inherited]
+
+
+

The flags associated with the memset.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint32_t CUpti_ActivityMemset3::graphId [inherited]
+
+
+

The unique ID of the graph that executed this memset through graph launch. This field will be 0 if the memset is not executed + through graph launch. +

+
+
+
+ uint64_t CUpti_ActivityMemset3::graphNodeId [inherited]
+
+
+

The unique ID of the graph node that executed this memset through graph launch. This field will be 0 if the memset is not + executed through graph launch. +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemset3::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMSET.

+
+
+
+ uint16_t CUpti_ActivityMemset3::memoryKind [inherited]
+
+
+

The memory kind of the memory set

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint32_t CUpti_ActivityMemset3::padding [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ + + void + * CUpti_ActivityMemset3::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint64_t CUpti_ActivityMemset3::start [inherited]
+
+
+

The start timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory set. +

+
+
+
+ uint32_t CUpti_ActivityMemset3::streamId [inherited]
+
+
+

The ID of the stream where the memory set is occurring.

+
+
+
+ uint32_t CUpti_ActivityMemset3::value [inherited]
+
+
+

The value being assigned to memory by the memory set.

+
+
+
+
+
+
+

6.64. CUpti_ActivityMemset4 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a memory set operation (CUPTI_ACTIVITY_KIND_MEMSET).

+
+

Public Variables

+
+
uint64_t  bytes
+
+
uint32_t  channelID
+
+
CUpti_ChannelType  channelType
+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  deviceId
+
+
uint64_t  end
+
+
uint16_t  flags
+
+
uint32_t  graphId
+
+
uint64_t  graphNodeId
+
+
CUpti_ActivityKind kind
+
+
uint16_t  memoryKind
+
+
uint32_t  pad2
+
+
+ void + * reserved0
+
+
uint64_t  start
+
+
uint32_t  streamId
+
+
uint32_t  value
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityMemset4::bytes [inherited]
+
+
+

The number of bytes being set by the memory set.

+
+
+
+ uint32_t CUpti_ActivityMemset4::channelID [inherited]
+
+
+

The ID of the HW channel on which the memory set is occuring.

+
+
+
+ CUpti_ChannelType CUpti_ActivityMemset4::channelType [inherited]
+
+
+

The type of the channel

+
+
+
+ uint32_t CUpti_ActivityMemset4::contextId [inherited]
+
+
+

The ID of the context where the memory set is occurring.

+
+
+
+ uint32_t CUpti_ActivityMemset4::correlationId [inherited]
+
+
+

The correlation ID of the memory set. Each memory set is assigned a unique correlation ID that is identical to the correlation + ID in the driver API activity record that launched the memory set. +

+
+
+
+ uint32_t CUpti_ActivityMemset4::deviceId [inherited]
+
+
+

The ID of the device where the memory set is occurring.

+
+
+
+ uint64_t CUpti_ActivityMemset4::end [inherited]
+
+
+

The end timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory set. +

+
+
+
+ uint16_t CUpti_ActivityMemset4::flags [inherited]
+
+
+

The flags associated with the memset.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ uint32_t CUpti_ActivityMemset4::graphId [inherited]
+
+
+

The unique ID of the graph that executed this memset through graph launch. This field will be 0 if the memset is not executed + through graph launch. +

+
+
+
+ uint64_t CUpti_ActivityMemset4::graphNodeId [inherited]
+
+
+

The unique ID of the graph node that executed this memset through graph launch. This field will be 0 if the memset is not + executed through graph launch. +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMemset4::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMSET.

+
+
+
+ uint16_t CUpti_ActivityMemset4::memoryKind [inherited]
+
+
+

The memory kind of the memory set

+

See also:

+

CUpti_ActivityMemoryKind

+

+
+
+
+ uint32_t CUpti_ActivityMemset4::pad2 [inherited]
+
+
+

Undefined. Reserved for internal use

+
+
+
+ + + void + * CUpti_ActivityMemset4::reserved0 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint64_t CUpti_ActivityMemset4::start [inherited]
+
+
+

The start timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the memory set. +

+
+
+
+ uint32_t CUpti_ActivityMemset4::streamId [inherited]
+
+
+

The ID of the stream where the memory set is occurring.

+
+
+
+ uint32_t CUpti_ActivityMemset4::value [inherited]
+
+
+

The value being assigned to memory by the memory set.

+
+
+
+
+
+
+

6.65. CUpti_ActivityMetric Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents the collection of a CUPTI metric value (CUPTI_ACTIVITY_KIND_METRIC). This activity record + kind is not produced by the activity API but is included for completeness and ease-of-use. Profile frameworks built on top + of CUPTI that collect metric data may choose to use this type to store the collected metric data. +

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
uint8_t  flags
+
+
CUpti_MetricID id
+
+
CUpti_ActivityKind kind
+
+
uint8_t  pad[3]
+
+
union CUpti_MetricValue value
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityMetric::correlationId [inherited]
+
+
+

The correlation ID of the metric. Use of this ID is user-defined, but typically this ID value will equal the correlation + ID of the kernel for which the metric was gathered. +

+
+
+
+ uint8_t CUpti_ActivityMetric::flags [inherited]
+
+
+

The properties of this metric.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ CUpti_MetricIDCUpti_ActivityMetric::id [inherited]
+
+
+

The metric ID.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMetric::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_METRIC.

+
+
+
+ uint8_t CUpti_ActivityMetric::pad[3] [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ union CUpti_MetricValueCUpti_ActivityMetric::value [inherited]
+
+
+

The metric value.

+
+
+
+
+
+
+

6.66. CUpti_ActivityMetricInstance Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a CUPTI metric value for a specific metric domain instance (CUPTI_ACTIVITY_KIND_METRIC_INSTANCE). + This activity record kind is not produced by the activity API but is included for completeness and ease-of-use. Profile frameworks + built on top of CUPTI that collect metric data may choose to use this type to store the collected metric data. This activity + record should be used when metric domain instance information needs to be associated with the metric. +

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
uint8_t  flags
+
+
CUpti_MetricID id
+
+
uint32_t  instance
+
+
CUpti_ActivityKind kind
+
+
uint8_t  pad[7]
+
+
union CUpti_MetricValue value
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityMetricInstance::correlationId [inherited]
+
+
+

The correlation ID of the metric. Use of this ID is user-defined, but typically this ID value will equal the correlation + ID of the kernel for which the metric was gathered. +

+
+
+
+ uint8_t CUpti_ActivityMetricInstance::flags [inherited]
+
+
+

The properties of this metric.

+

See also:

+

CUpti_ActivityFlag

+

+
+
+
+ CUpti_MetricIDCUpti_ActivityMetricInstance::id [inherited]
+
+
+

The metric ID.

+
+
+
+ uint32_t CUpti_ActivityMetricInstance::instance [inherited]
+
+
+

The metric domain instance.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityMetricInstance::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_METRIC_INSTANCE.

+
+
+
+ uint8_t CUpti_ActivityMetricInstance::pad[7] [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ union CUpti_MetricValueCUpti_ActivityMetricInstance::value [inherited]
+
+
+

The metric value.

+
+
+
+
+
+
+

6.67. CUpti_ActivityModule Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a CUDA module (CUPTI_ACTIVITY_KIND_MODULE). This activity record kind is not produced by the + activity API but is included for completeness and ease-of-use. Profile frameworks built on top of CUPTI that collect module + data from the module callback may choose to use this type to store the collected module data. +

+
+

Public Variables

+
+
uint32_t  contextId
+
+
const + void + * cubin
+
+
uint32_t  cubinSize
+
+
uint32_t  id
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pad
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityModule::contextId [inherited]
+
+
+

The ID of the context where the module is loaded.

+
+
+
+ const + + void + * CUpti_ActivityModule::cubin [inherited]
+
+
+

The pointer to cubin.

+
+
+
+ uint32_t CUpti_ActivityModule::cubinSize [inherited]
+
+
+

The cubin size.

+
+
+
+ uint32_t CUpti_ActivityModule::id [inherited]
+
+
+

The module ID.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityModule::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_MODULE.

+
+
+
+ uint32_t CUpti_ActivityModule::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+
+
+
+

6.68. CUpti_ActivityName Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record provides a name for a device, context, thread, etc. and other resource naming done via NVTX APIs (CUPTI_ACTIVITY_KIND_NAME). + +

+
+

Public Variables

+
+
CUpti_ActivityKind kind
+
+
const + char + * name
+
+
union CUpti_ActivityObjectKindId objectId
+
+
CUpti_ActivityObjectKind objectKind
+
+
+
+

Variables

+
+
+ CUpti_ActivityKindCUpti_ActivityName::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_NAME.

+
+
+
+ const + + char + * CUpti_ActivityName::name [inherited]
+
+
+

The name.

+
+
+
+ union CUpti_ActivityObjectKindIdCUpti_ActivityName::objectId [inherited]
+
+
+

The identifier for the activity object. 'objectKind' indicates which ID is valid for this record.

+
+
+
+ CUpti_ActivityObjectKindCUpti_ActivityName::objectKind [inherited]
+
+
+

The kind of activity object being named.

+
+
+
+
+
+ +
+

6.70. CUpti_ActivityNvLink2 Struct Reference

+

[CUPTI Activity API] +

+
+

This structure gives capabilities of each logical NVLink connection between two devices, gpu<->gpu or gpu<->CPU which can + be used to understand the topology. NvLink information are now reported using the CUpti_ActivityNvLink4 activity record. +

+
+

Public Variables

+
+
uint64_t  bandwidth
+
+
uint32_t  domainId
+
+
uint32_t  flag
+
+
CUpti_ActivityNvLink2::@32  idDev0
+
+
CUpti_ActivityNvLink2::@33  idDev1
+
+
uint32_t  index
+
+
CUpti_ActivityKind kind
+
+
uint32_t  nvlinkVersion
+
+
uint32_t  physicalNvLinkCount
+
+
int8_t  portDev0[CUPTI_MAX_NVLINK_PORTS]
+
+
int8_t  portDev1[CUPTI_MAX_NVLINK_PORTS]
+
+
CUpti_DevType typeDev0
+
+
CUpti_DevType typeDev1
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityNvLink2::bandwidth [inherited]
+
+
+

Banwidth of NVLink in kbytes/sec

+
+
+
+ uint32_t CUpti_ActivityNvLink2::domainId [inherited]
+
+
+

Domain ID of NPU. On Linux, this can be queried using lspci.

+
+
+
+ uint32_t CUpti_ActivityNvLink2::flag [inherited]
+
+
+

Flag gives capabilities of the link

+

See also:

+

CUpti_LinkFlag

+

+
+
+
+ CUpti_ActivityNvLink2::@32 CUpti_ActivityNvLink2::idDev0 [inherited]
+
+
+

If typeDev0 is CUPTI_DEV_TYPE_GPU, UUID for device 0. CUpti_ActivityDevice4. If typeDev0 is CUPTI_DEV_TYPE_NPU, struct npu for NPU. +

+
+
+
+ CUpti_ActivityNvLink2::@33 CUpti_ActivityNvLink2::idDev1 [inherited]
+
+
+

If typeDev1 is CUPTI_DEV_TYPE_GPU, UUID for device 1. CUpti_ActivityDevice4. If typeDev1 is CUPTI_DEV_TYPE_NPU, struct npu for NPU. +

+
+
+
+ uint32_t CUpti_ActivityNvLink2::index [inherited]
+
+
+

Index of the NPU. First index will always be zero.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityNvLink2::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_NVLINK.

+
+
+
+ uint32_t CUpti_ActivityNvLink2::nvlinkVersion [inherited]
+
+
+

NvLink version.

+
+
+
+ uint32_t CUpti_ActivityNvLink2::physicalNvLinkCount [inherited]
+
+
+

Number of physical NVLinks present between two devices.

+
+
+
+ int8_t CUpti_ActivityNvLink2::portDev0[CUPTI_MAX_NVLINK_PORTS] [inherited]
+
+
+

Port numbers for maximum 16 NVLinks connected to device 0. If typeDev0 is CUPTI_DEV_TYPE_NPU, ignore this field. In case + of invalid/unknown port number, this field will be set to value CUPTI_NVLINK_INVALID_PORT. This will be used to correlate + the metric values to individual physical link and attribute traffic to the logical NVLink in the topology. +

+
+
+
+ int8_t CUpti_ActivityNvLink2::portDev1[CUPTI_MAX_NVLINK_PORTS] [inherited]
+
+
+

Port numbers for maximum 16 NVLinks connected to device 1. If typeDev1 is CUPTI_DEV_TYPE_NPU, ignore this field. In case + of invalid/unknown port number, this field will be set to value CUPTI_NVLINK_INVALID_PORT. This will be used to correlate + the metric values to individual physical link and attribute traffic to the logical NVLink in the topology. +

+
+
+
+ CUpti_DevTypeCUpti_ActivityNvLink2::typeDev0 [inherited]
+
+
+

Type of device 0 CUpti_DevType

+
+
+
+ CUpti_DevTypeCUpti_ActivityNvLink2::typeDev1 [inherited]
+
+
+

Type of device 1 CUpti_DevType

+
+
+
+
+
+
+

6.71. CUpti_ActivityNvLink3 Struct Reference

+

[CUPTI Activity API] +

+
+

This structure gives capabilities of each logical NVLink connection between two devices, gpu<->gpu or gpu<->CPU which can + be used to understand the topology. NvLink information are now reported using the CUpti_ActivityNvLink4 activity record. +

+
+

Public Variables

+
+
uint64_t  bandwidth
+
+
uint32_t  domainId
+
+
uint32_t  flag
+
+
CUpti_ActivityNvLink3::@36  idDev0
+
+
CUpti_ActivityNvLink3::@37  idDev1
+
+
uint32_t  index
+
+
CUpti_ActivityKind kind
+
+
uint32_t  nvlinkVersion
+
+
uint8_t  nvswitchConnected
+
+
uint8_t  pad[7]
+
+
uint32_t  physicalNvLinkCount
+
+
int8_t  portDev0[CUPTI_MAX_NVLINK_PORTS]
+
+
int8_t  portDev1[CUPTI_MAX_NVLINK_PORTS]
+
+
CUpti_DevType typeDev0
+
+
CUpti_DevType typeDev1
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityNvLink3::bandwidth [inherited]
+
+
+

Banwidth of NVLink in kbytes/sec

+
+
+
+ uint32_t CUpti_ActivityNvLink3::domainId [inherited]
+
+
+

Domain ID of NPU. On Linux, this can be queried using lspci.

+
+
+
+ uint32_t CUpti_ActivityNvLink3::flag [inherited]
+
+
+

Flag gives capabilities of the link

+

See also:

+

CUpti_LinkFlag

+

+
+
+
+ CUpti_ActivityNvLink3::@36 CUpti_ActivityNvLink3::idDev0 [inherited]
+
+
+

If typeDev0 is CUPTI_DEV_TYPE_GPU, UUID for device 0. CUpti_ActivityDevice4. If typeDev0 is CUPTI_DEV_TYPE_NPU, struct npu for NPU. +

+
+
+
+ CUpti_ActivityNvLink3::@37 CUpti_ActivityNvLink3::idDev1 [inherited]
+
+
+

If typeDev1 is CUPTI_DEV_TYPE_GPU, UUID for device 1. CUpti_ActivityDevice4. If typeDev1 is CUPTI_DEV_TYPE_NPU, struct npu for NPU. +

+
+
+
+ uint32_t CUpti_ActivityNvLink3::index [inherited]
+
+
+

Index of the NPU. First index will always be zero.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityNvLink3::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_NVLINK.

+
+
+
+ uint32_t CUpti_ActivityNvLink3::nvlinkVersion [inherited]
+
+
+

NvLink version.

+
+
+
+ uint8_t CUpti_ActivityNvLink3::nvswitchConnected [inherited]
+
+
+

NVSwitch is connected as an intermediate node.

+
+
+
+ uint8_t CUpti_ActivityNvLink3::pad[7] [inherited]
+
+
+

Undefined. reserved for internal use

+
+
+
+ uint32_t CUpti_ActivityNvLink3::physicalNvLinkCount [inherited]
+
+
+

Number of physical NVLinks present between two devices.

+
+
+
+ int8_t CUpti_ActivityNvLink3::portDev0[CUPTI_MAX_NVLINK_PORTS] [inherited]
+
+
+

Port numbers for maximum 16 NVLinks connected to device 0. If typeDev0 is CUPTI_DEV_TYPE_NPU, ignore this field. In case + of invalid/unknown port number, this field will be set to value CUPTI_NVLINK_INVALID_PORT. This will be used to correlate + the metric values to individual physical link and attribute traffic to the logical NVLink in the topology. +

+
+
+
+ int8_t CUpti_ActivityNvLink3::portDev1[CUPTI_MAX_NVLINK_PORTS] [inherited]
+
+
+

Port numbers for maximum 16 NVLinks connected to device 1. If typeDev1 is CUPTI_DEV_TYPE_NPU, ignore this field. In case + of invalid/unknown port number, this field will be set to value CUPTI_NVLINK_INVALID_PORT. This will be used to correlate + the metric values to individual physical link and attribute traffic to the logical NVLink in the topology. +

+
+
+
+ CUpti_DevTypeCUpti_ActivityNvLink3::typeDev0 [inherited]
+
+
+

Type of device 0 CUpti_DevType

+
+
+
+ CUpti_DevTypeCUpti_ActivityNvLink3::typeDev1 [inherited]
+
+
+

Type of device 1 CUpti_DevType

+
+
+
+
+
+
+

6.72. CUpti_ActivityNvLink4 Struct Reference

+

[CUPTI Activity API] +

+
+

This structure gives capabilities of each logical NVLink connection between two devices, gpu<->gpu or gpu<->CPU which can + be used to understand the topology. +

+
+

Public Variables

+
+
uint64_t  bandwidth
+
+
uint32_t  domainId
+
+
uint32_t  flag
+
+
CUpti_ActivityNvLink4::@40  idDev0
+
+
CUpti_ActivityNvLink4::@41  idDev1
+
+
uint32_t  index
+
+
CUpti_ActivityKind kind
+
+
uint32_t  nvlinkVersion
+
+
uint8_t  nvswitchConnected
+
+
uint8_t  pad[7]
+
+
uint32_t  physicalNvLinkCount
+
+
int8_t  portDev0[CUPTI_MAX_NVLINK_PORTS]
+
+
int8_t  portDev1[CUPTI_MAX_NVLINK_PORTS]
+
+
CUpti_DevType typeDev0
+
+
CUpti_DevType typeDev1
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityNvLink4::bandwidth [inherited]
+
+
+

Banwidth of NVLink in kbytes/sec

+
+
+
+ uint32_t CUpti_ActivityNvLink4::domainId [inherited]
+
+
+

Domain ID of NPU. On Linux, this can be queried using lspci.

+
+
+
+ uint32_t CUpti_ActivityNvLink4::flag [inherited]
+
+
+

Flag gives capabilities of the link

+

See also:

+

CUpti_LinkFlag

+

+
+
+
+ CUpti_ActivityNvLink4::@40 CUpti_ActivityNvLink4::idDev0 [inherited]
+
+
+

If typeDev0 is CUPTI_DEV_TYPE_GPU, UUID for device 0. CUpti_ActivityDevice4. If typeDev0 is CUPTI_DEV_TYPE_NPU, struct npu for NPU. +

+
+
+
+ CUpti_ActivityNvLink4::@41 CUpti_ActivityNvLink4::idDev1 [inherited]
+
+
+

If typeDev1 is CUPTI_DEV_TYPE_GPU, UUID for device 1. CUpti_ActivityDevice4. If typeDev1 is CUPTI_DEV_TYPE_NPU, struct npu for NPU. +

+
+
+
+ uint32_t CUpti_ActivityNvLink4::index [inherited]
+
+
+

Index of the NPU. First index will always be zero.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityNvLink4::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_NVLINK.

+
+
+
+ uint32_t CUpti_ActivityNvLink4::nvlinkVersion [inherited]
+
+
+

NvLink version.

+
+
+
+ uint8_t CUpti_ActivityNvLink4::nvswitchConnected [inherited]
+
+
+

NVSwitch is connected as an intermediate node.

+
+
+
+ uint8_t CUpti_ActivityNvLink4::pad[7] [inherited]
+
+
+

Undefined. reserved for internal use

+
+
+
+ uint32_t CUpti_ActivityNvLink4::physicalNvLinkCount [inherited]
+
+
+

Number of physical NVLinks present between two devices.

+
+
+
+ int8_t CUpti_ActivityNvLink4::portDev0[CUPTI_MAX_NVLINK_PORTS] [inherited]
+
+
+

Port numbers for maximum 32 NVLinks connected to device 0. If typeDev0 is CUPTI_DEV_TYPE_NPU, ignore this field. In case + of invalid/unknown port number, this field will be set to value CUPTI_NVLINK_INVALID_PORT. This will be used to correlate + the metric values to individual physical link and attribute traffic to the logical NVLink in the topology. +

+
+
+
+ int8_t CUpti_ActivityNvLink4::portDev1[CUPTI_MAX_NVLINK_PORTS] [inherited]
+
+
+

Port numbers for maximum 32 NVLinks connected to device 1. If typeDev1 is CUPTI_DEV_TYPE_NPU, ignore this field. In case + of invalid/unknown port number, this field will be set to value CUPTI_NVLINK_INVALID_PORT. This will be used to correlate + the metric values to individual physical link and attribute traffic to the logical NVLink in the topology. +

+
+
+
+ CUpti_DevTypeCUpti_ActivityNvLink4::typeDev0 [inherited]
+
+
+

Type of device 0 CUpti_DevType

+
+
+
+ CUpti_DevTypeCUpti_ActivityNvLink4::typeDev1 [inherited]
+
+
+

Type of device 1 CUpti_DevType

+
+
+
+
+
+
+

6.73. CUpti_ActivityObjectKindId Union Reference

+

[CUPTI Activity API] +

+
+

See also:

+

CUpti_ActivityObjectKind

+

+
+

Public Variables

+
+
CUpti_ActivityObjectKindId::@1  dcs
+
+
CUpti_ActivityObjectKindId::@0  pt
+
+
+
+

Variables

+
+
+ CUpti_ActivityObjectKindId::@1 CUpti_ActivityObjectKindId::dcs [inherited]
+
+
+

A device object requires that we identify the device ID. A context object requires that we identify both the device and context + ID. A stream object requires that we identify device, context, and stream ID. +

+
+
+
+ CUpti_ActivityObjectKindId::@0 CUpti_ActivityObjectKindId::pt [inherited]
+
+
+

A process object requires that we identify the process ID. A thread object requires that we identify both the process and + thread ID. +

+
+
+
+
+
+
+

6.74. CUpti_ActivityOpenAcc Struct Reference

+

[CUPTI Activity API] +

+
+

The OpenACC activity API part uses a CUpti_ActivityOpenAcc as a generic representation for any OpenACC activity. The 'kind' field is used to determine the specific activity kind, and + from that the CUpti_ActivityOpenAcc object can be cast to the specific OpenACC activity record type appropriate for that kind. +

+

Note that all OpenACC activity record types are padded and aligned to ensure that each member of the record is naturally aligned.

+

See also:

+

CUpti_ActivityKind

+

+
+

Public Variables

+
+
uint32_t  cuContextId
+
+
uint32_t  cuDeviceId
+
+
uint32_t  cuProcessId
+
+
uint32_t  cuStreamId
+
+
uint32_t  cuThreadId
+
+
uint64_t  end
+
+
CUpti_OpenAccEventKind eventKind
+
+
uint32_t  externalId
+
+
CUpti_ActivityKind kind
+
+
CUpti_OpenAccConstructKind parentConstruct
+
+
uint64_t  start
+
+
uint32_t  threadId
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityOpenAcc::cuContextId [inherited]
+
+
+

CUDA context id Valid only if deviceType is acc_device_nvidia.

+
+
+
+ uint32_t CUpti_ActivityOpenAcc::cuDeviceId [inherited]
+
+
+

CUDA device id Valid only if deviceType is acc_device_nvidia.

+
+
+
+ uint32_t CUpti_ActivityOpenAcc::cuProcessId [inherited]
+
+
+

The ID of the process where the OpenACC activity is executing.

+
+
+
+ uint32_t CUpti_ActivityOpenAcc::cuStreamId [inherited]
+
+
+

CUDA stream id Valid only if deviceType is acc_device_nvidia.

+
+
+
+ uint32_t CUpti_ActivityOpenAcc::cuThreadId [inherited]
+
+
+

The ID of the thread where the OpenACC activity is executing.

+
+
+
+ uint64_t CUpti_ActivityOpenAcc::end [inherited]
+
+
+

CUPTI end timestamp

+
+
+
+ CUpti_OpenAccEventKindCUpti_ActivityOpenAcc::eventKind [inherited]
+
+
+

CUPTI OpenACC event kind (

+

See also:

+

CUpti_OpenAccEventKind) +

+

+
+
+
+ uint32_t CUpti_ActivityOpenAcc::externalId [inherited]
+
+
+

The OpenACC correlation ID. Valid only if deviceType is acc_device_nvidia. If not 0, it uniquely identifies this record. + It is identical to the externalId in the preceeding external correlation record of type CUPTI_EXTERNAL_CORRELATION_KIND_OPENACC. + +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityOpenAcc::kind [inherited]
+
+
+

The kind of this activity.

+
+
+
+ CUpti_OpenAccConstructKindCUpti_ActivityOpenAcc::parentConstruct [inherited]
+
+
+

CUPTI OpenACC parent construct kind (

+

See also:

+

CUpti_OpenAccConstructKind) +

+

+ Note that for applications using PGI OpenACC runtime < 16.1, this will always be CUPTI_OPENACC_CONSTRUCT_KIND_UNKNOWN. +

+
+
+
+ uint64_t CUpti_ActivityOpenAcc::start [inherited]
+
+
+

CUPTI start timestamp

+
+
+
+ uint32_t CUpti_ActivityOpenAcc::threadId [inherited]
+
+
+

ThreadId

+
+
+
+
+
+
+

6.75. CUpti_ActivityOpenAccData Struct Reference

+

[CUPTI Activity API] +

+
+

(CUPTI_ACTIVITY_KIND_OPENACC_DATA).

+
+

Public Variables

+
+
uint64_t  bytes
+
+
uint32_t  cuContextId
+
+
uint32_t  cuDeviceId
+
+
uint32_t  cuProcessId
+
+
uint32_t  cuStreamId
+
+
uint32_t  cuThreadId
+
+
uint64_t  devicePtr
+
+
uint64_t  end
+
+
CUpti_OpenAccEventKind eventKind
+
+
uint32_t  externalId
+
+
uint64_t  hostPtr
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pad1
+
+
uint64_t  start
+
+
uint32_t  threadId
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityOpenAccData::bytes [inherited]
+
+
+

Number of bytes

+
+
+
+ uint32_t CUpti_ActivityOpenAccData::cuContextId [inherited]
+
+
+

CUDA context id Valid only if deviceType is acc_device_nvidia.

+
+
+
+ uint32_t CUpti_ActivityOpenAccData::cuDeviceId [inherited]
+
+
+

CUDA device id Valid only if deviceType is acc_device_nvidia.

+
+
+
+ uint32_t CUpti_ActivityOpenAccData::cuProcessId [inherited]
+
+
+

The ID of the process where the OpenACC activity is executing.

+
+
+
+ uint32_t CUpti_ActivityOpenAccData::cuStreamId [inherited]
+
+
+

CUDA stream id Valid only if deviceType is acc_device_nvidia.

+
+
+
+ uint32_t CUpti_ActivityOpenAccData::cuThreadId [inherited]
+
+
+

The ID of the thread where the OpenACC activity is executing.

+
+
+
+ uint64_t CUpti_ActivityOpenAccData::devicePtr [inherited]
+
+
+

Device pointer if available

+
+
+
+ uint64_t CUpti_ActivityOpenAccData::end [inherited]
+
+
+

CUPTI end timestamp

+
+
+
+ CUpti_OpenAccEventKindCUpti_ActivityOpenAccData::eventKind [inherited]
+
+
+

CUPTI OpenACC event kind (

+

See also:

+

CUpti_OpenAccEventKind) +

+

+
+
+
+ uint32_t CUpti_ActivityOpenAccData::externalId [inherited]
+
+
+

The OpenACC correlation ID. Valid only if deviceType is acc_device_nvidia. If not 0, it uniquely identifies this record. + It is identical to the externalId in the preceeding external correlation record of type CUPTI_EXTERNAL_CORRELATION_KIND_OPENACC. + +

+
+
+
+ uint64_t CUpti_ActivityOpenAccData::hostPtr [inherited]
+
+
+

Host pointer if available

+
+
+
+ CUpti_ActivityKindCUpti_ActivityOpenAccData::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_OPENACC_DATA.

+
+
+
+ uint32_t CUpti_ActivityOpenAccData::pad1 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint64_t CUpti_ActivityOpenAccData::start [inherited]
+
+
+

CUPTI start timestamp

+
+
+
+ uint32_t CUpti_ActivityOpenAccData::threadId [inherited]
+
+
+

ThreadId

+
+
+
+
+
+
+

6.76. CUpti_ActivityOpenAccLaunch Struct Reference

+

[CUPTI Activity API] +

+
+

(CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH).

+
+

Public Variables

+
+
uint32_t  cuContextId
+
+
uint32_t  cuDeviceId
+
+
uint32_t  cuProcessId
+
+
uint32_t  cuStreamId
+
+
uint32_t  cuThreadId
+
+
uint64_t  end
+
+
CUpti_OpenAccEventKind eventKind
+
+
uint32_t  externalId
+
+
CUpti_ActivityKind kind
+
+
uint64_t  numGangs
+
+
uint64_t  numWorkers
+
+
uint32_t  pad1
+
+
uint64_t  start
+
+
uint32_t  threadId
+
+
uint64_t  vectorLength
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityOpenAccLaunch::cuContextId [inherited]
+
+
+

CUDA context id Valid only if deviceType is acc_device_nvidia.

+
+
+
+ uint32_t CUpti_ActivityOpenAccLaunch::cuDeviceId [inherited]
+
+
+

CUDA device id Valid only if deviceType is acc_device_nvidia.

+
+
+
+ uint32_t CUpti_ActivityOpenAccLaunch::cuProcessId [inherited]
+
+
+

The ID of the process where the OpenACC activity is executing.

+
+
+
+ uint32_t CUpti_ActivityOpenAccLaunch::cuStreamId [inherited]
+
+
+

CUDA stream id Valid only if deviceType is acc_device_nvidia.

+
+
+
+ uint32_t CUpti_ActivityOpenAccLaunch::cuThreadId [inherited]
+
+
+

The ID of the thread where the OpenACC activity is executing.

+
+
+
+ uint64_t CUpti_ActivityOpenAccLaunch::end [inherited]
+
+
+

CUPTI end timestamp

+
+
+
+ CUpti_OpenAccEventKindCUpti_ActivityOpenAccLaunch::eventKind [inherited]
+
+
+

CUPTI OpenACC event kind (

+

See also:

+

CUpti_OpenAccEventKind) +

+

+
+
+
+ uint32_t CUpti_ActivityOpenAccLaunch::externalId [inherited]
+
+
+

The OpenACC correlation ID. Valid only if deviceType is acc_device_nvidia. If not 0, it uniquely identifies this record. + It is identical to the externalId in the preceeding external correlation record of type CUPTI_EXTERNAL_CORRELATION_KIND_OPENACC. + +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityOpenAccLaunch::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH.

+
+
+
+ uint64_t CUpti_ActivityOpenAccLaunch::numGangs [inherited]
+
+
+

The number of gangs created for this kernel launch

+
+
+
+ uint64_t CUpti_ActivityOpenAccLaunch::numWorkers [inherited]
+
+
+

The number of workers created for this kernel launch

+
+
+
+ uint32_t CUpti_ActivityOpenAccLaunch::pad1 [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint64_t CUpti_ActivityOpenAccLaunch::start [inherited]
+
+
+

CUPTI start timestamp

+
+
+
+ uint32_t CUpti_ActivityOpenAccLaunch::threadId [inherited]
+
+
+

ThreadId

+
+
+
+ uint64_t CUpti_ActivityOpenAccLaunch::vectorLength [inherited]
+
+
+

The number of vector lanes created for this kernel launch

+
+
+
+
+
+
+

6.77. CUpti_ActivityOpenAccOther Struct Reference

+

[CUPTI Activity API] +

+
+

(CUPTI_ACTIVITY_KIND_OPENACC_OTHER).

+
+

Public Variables

+
+
uint32_t  cuContextId
+
+
uint32_t  cuDeviceId
+
+
uint32_t  cuProcessId
+
+
uint32_t  cuStreamId
+
+
uint32_t  cuThreadId
+
+
uint64_t  end
+
+
CUpti_OpenAccEventKind eventKind
+
+
uint32_t  externalId
+
+
CUpti_ActivityKind kind
+
+
uint64_t  start
+
+
uint32_t  threadId
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityOpenAccOther::cuContextId [inherited]
+
+
+

CUDA context id Valid only if deviceType is acc_device_nvidia.

+
+
+
+ uint32_t CUpti_ActivityOpenAccOther::cuDeviceId [inherited]
+
+
+

CUDA device id Valid only if deviceType is acc_device_nvidia.

+
+
+
+ uint32_t CUpti_ActivityOpenAccOther::cuProcessId [inherited]
+
+
+

The ID of the process where the OpenACC activity is executing.

+
+
+
+ uint32_t CUpti_ActivityOpenAccOther::cuStreamId [inherited]
+
+
+

CUDA stream id Valid only if deviceType is acc_device_nvidia.

+
+
+
+ uint32_t CUpti_ActivityOpenAccOther::cuThreadId [inherited]
+
+
+

The ID of the thread where the OpenACC activity is executing.

+
+
+
+ uint64_t CUpti_ActivityOpenAccOther::end [inherited]
+
+
+

CUPTI end timestamp

+
+
+
+ CUpti_OpenAccEventKindCUpti_ActivityOpenAccOther::eventKind [inherited]
+
+
+

CUPTI OpenACC event kind (

+

See also:

+

CUpti_OpenAccEventKind) +

+

+
+
+
+ uint32_t CUpti_ActivityOpenAccOther::externalId [inherited]
+
+
+

The OpenACC correlation ID. Valid only if deviceType is acc_device_nvidia. If not 0, it uniquely identifies this record. + It is identical to the externalId in the preceeding external correlation record of type CUPTI_EXTERNAL_CORRELATION_KIND_OPENACC. + +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityOpenAccOther::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_OPENACC_OTHER.

+
+
+
+ uint64_t CUpti_ActivityOpenAccOther::start [inherited]
+
+
+

CUPTI start timestamp

+
+
+
+ uint32_t CUpti_ActivityOpenAccOther::threadId [inherited]
+
+
+

ThreadId

+
+
+
+
+
+
+

6.78. CUpti_ActivityOpenMp Struct Reference

+

[CUPTI Activity API] +

+
+

See also:

+

CUpti_ActivityKind

+

+
+

Public Variables

+
+
uint32_t  cuProcessId
+
+
uint32_t  cuThreadId
+
+
uint64_t  end
+
+
CUpti_OpenMpEventKind  eventKind
+
+
CUpti_ActivityKind kind
+
+
uint64_t  start
+
+
uint32_t  threadId
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityOpenMp::cuProcessId [inherited]
+
+
+

The ID of the process where the OpenMP activity is executing.

+
+
+
+ uint32_t CUpti_ActivityOpenMp::cuThreadId [inherited]
+
+
+

The ID of the thread where the OpenMP activity is executing.

+
+
+
+ uint64_t CUpti_ActivityOpenMp::end [inherited]
+
+
+

CUPTI end timestamp

+
+
+
+ CUpti_OpenMpEventKind CUpti_ActivityOpenMp::eventKind [inherited]
+
+
+

CUPTI OpenMP event kind (

+

See also:

+

CUpti_OpenMpEventKind)

+

+
+
+
+ CUpti_ActivityKindCUpti_ActivityOpenMp::kind [inherited]
+
+
+

The kind of this activity.

+
+
+
+ uint64_t CUpti_ActivityOpenMp::start [inherited]
+
+
+

CUPTI start timestamp

+
+
+
+ uint32_t CUpti_ActivityOpenMp::threadId [inherited]
+
+
+

ThreadId

+
+
+
+
+
+
+

6.79. CUpti_ActivityOverhead Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record provides CUPTI and driver overhead information (CUPTI_ACTIVITY_OVERHEAD).

+
+

Public Variables

+
+
uint64_t  end
+
+
CUpti_ActivityKind kind
+
+
union CUpti_ActivityObjectKindId objectId
+
+
CUpti_ActivityObjectKind objectKind
+
+
CUpti_ActivityOverheadKind overheadKind
+
+
uint64_t  start
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityOverhead::end [inherited]
+
+
+

The end timestamp for the overhead, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information + could not be collected for the overhead. +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityOverhead::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_OVERHEAD.

+
+
+
+ union CUpti_ActivityObjectKindIdCUpti_ActivityOverhead::objectId [inherited]
+
+
+

The identifier for the activity object. 'objectKind' indicates which ID is valid for this record.

+
+
+
+ CUpti_ActivityObjectKindCUpti_ActivityOverhead::objectKind [inherited]
+
+
+

The kind of activity object that the overhead is associated with.

+
+
+
+ CUpti_ActivityOverheadKindCUpti_ActivityOverhead::overheadKind [inherited]
+
+
+

The kind of overhead, CUPTI, DRIVER, COMPILER etc.

+
+
+
+ uint64_t CUpti_ActivityOverhead::start [inherited]
+
+
+

The start timestamp for the overhead, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the overhead. +

+
+
+
+
+
+
+

6.80. CUpti_ActivityPcie Struct Reference

+

[CUPTI Activity API] +

+
+

This structure gives capabilities of GPU and PCI bridge connected to the PCIE bus which can be used to understand the topology. + +

+
+

Public Variables

+
+
CUpti_ActivityPcie::@45  attr
+
+
uint32_t  bridgeId
+
+
CUdevice  devId
+
+
uint16_t  deviceId
+
+
uint32_t  domain
+
+
CUpti_ActivityPcie::@44  id
+
+
CUpti_ActivityKind kind
+
+
uint16_t  linkRate
+
+
uint16_t  linkWidth
+
+
uint16_t  pad0
+
+
uint16_t  pcieGeneration
+
+
CUdevice  peerDev[CUPTI_MAX_GPUS]
+
+
uint16_t  secondaryBus
+
+
CUpti_PcieDeviceType type
+
+
uint16_t  upstreamBus
+
+
CUuuid  uuidDev
+
+
uint16_t  vendorId
+
+
+
+

Variables

+
+
+ CUpti_ActivityPcie::@45 CUpti_ActivityPcie::attr [inherited]
+
+
+

Attributes for more information about GPU (gpuAttr) or PCI Bridge (bridgeAttr)

+
+
+
+ uint32_t CUpti_ActivityPcie::bridgeId [inherited]
+
+
+

A unique identifier for Bridge in the Topology

+
+
+
+ CUdevice CUpti_ActivityPcie::devId [inherited]
+
+
+

GPU device ID

+
+
+
+ uint16_t CUpti_ActivityPcie::deviceId [inherited]
+
+
+

Device ID of the bridge

+
+
+
+ uint32_t CUpti_ActivityPcie::domain [inherited]
+
+
+

Domain for the GPU or Bridge, required to identify which PCIE bus it belongs to in multiple NUMA systems.

+
+
+
+ CUpti_ActivityPcie::@44 CUpti_ActivityPcie::id [inherited]
+
+
+

A unique identifier for GPU or Bridge in Topology

+
+
+
+ CUpti_ActivityKindCUpti_ActivityPcie::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_PCIE.

+
+
+
+ uint16_t CUpti_ActivityPcie::linkRate [inherited]
+
+
+

Link rate of the GPU or bridge in gigatransfers per second (GT/s)

+
+
+
+ uint16_t CUpti_ActivityPcie::linkWidth [inherited]
+
+
+

Link width of the GPU or bridge

+
+
+
+ uint16_t CUpti_ActivityPcie::pad0 [inherited]
+
+
+

Padding for alignment

+
+
+
+ uint16_t CUpti_ActivityPcie::pcieGeneration [inherited]
+
+
+

PCIE Generation of GPU or Bridge.

+
+
+
+ CUdevice CUpti_ActivityPcie::peerDev[CUPTI_MAX_GPUS] [inherited]
+
+
+

CUdevice with which this device has P2P capability. This can also be obtained by querying cuDeviceCanAccessPeer or cudaDeviceCanAccessPeer + APIs +

+
+
+
+ uint16_t CUpti_ActivityPcie::secondaryBus [inherited]
+
+
+

The downstream bus number, used to search downstream devices/bridges connected to this bridge.

+
+
+
+ CUpti_PcieDeviceTypeCUpti_ActivityPcie::type [inherited]
+
+
+

Type of device in topology, CUpti_PcieDeviceType. If type is CUPTI_PCIE_DEVICE_TYPE_GPU use devId for id and gpuAttr and if type is CUPTI_PCIE_DEVICE_TYPE_BRIDGE use bridgeId + for id and bridgeAttr. +

+
+
+
+ uint16_t CUpti_ActivityPcie::upstreamBus [inherited]
+
+
+

Upstream bus ID for the GPU or PCI bridge. Required to identify which bus it is connected to in the topology.

+
+
+
+ CUuuid CUpti_ActivityPcie::uuidDev [inherited]
+
+
+

UUID for the device. CUpti_ActivityDevice4. +

+
+
+
+ uint16_t CUpti_ActivityPcie::vendorId [inherited]
+
+
+

Vendor ID of the bridge

+
+
+
+
+
+
+

6.81. CUpti_ActivityPCSampling Struct Reference

+

[CUPTI Activity API] +

+
+

This activity records information obtained by sampling PC (CUPTI_ACTIVITY_KIND_PC_SAMPLING). PC sampling activities are now + reported using the CUpti_ActivityPCSampling2 activity record. +

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
CUpti_ActivityFlag flags
+
+
uint32_t  functionId
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pcOffset
+
+
uint32_t  samples
+
+
uint32_t  sourceLocatorId
+
+
CUpti_ActivityPCSamplingStallReason stallReason
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityPCSampling::correlationId [inherited]
+
+
+

The correlation ID of the kernel to which this result is associated.

+
+
+
+ CUpti_ActivityFlagCUpti_ActivityPCSampling::flags [inherited]
+
+
+

The properties of this instruction.

+
+
+
+ uint32_t CUpti_ActivityPCSampling::functionId [inherited]
+
+
+

Correlation ID with global/device function name

+
+
+
+ CUpti_ActivityKindCUpti_ActivityPCSampling::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_PC_SAMPLING.

+
+
+
+ uint32_t CUpti_ActivityPCSampling::pcOffset [inherited]
+
+
+

The pc offset for the instruction.

+
+
+
+ uint32_t CUpti_ActivityPCSampling::samples [inherited]
+
+
+

Number of times the PC was sampled with the stallReason in the record. The same PC can be sampled with different stall reasons. + +

+
+
+
+ uint32_t CUpti_ActivityPCSampling::sourceLocatorId [inherited]
+
+
+

The ID for source locator.

+
+
+
+ CUpti_ActivityPCSamplingStallReasonCUpti_ActivityPCSampling::stallReason [inherited]
+
+
+

Current stall reason. Includes one of the reasons from CUpti_ActivityPCSamplingStallReason

+
+
+
+
+
+
+

6.82. CUpti_ActivityPCSampling2 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity records information obtained by sampling PC (CUPTI_ACTIVITY_KIND_PC_SAMPLING). PC sampling activities are now + reported using the CUpti_ActivityPCSampling3 activity record. +

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
CUpti_ActivityFlag flags
+
+
uint32_t  functionId
+
+
CUpti_ActivityKind kind
+
+
uint32_t  latencySamples
+
+
uint32_t  pcOffset
+
+
uint32_t  samples
+
+
uint32_t  sourceLocatorId
+
+
CUpti_ActivityPCSamplingStallReason stallReason
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityPCSampling2::correlationId [inherited]
+
+
+

The correlation ID of the kernel to which this result is associated.

+
+
+
+ CUpti_ActivityFlagCUpti_ActivityPCSampling2::flags [inherited]
+
+
+

The properties of this instruction.

+
+
+
+ uint32_t CUpti_ActivityPCSampling2::functionId [inherited]
+
+
+

Correlation ID with global/device function name

+
+
+
+ CUpti_ActivityKindCUpti_ActivityPCSampling2::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_PC_SAMPLING.

+
+
+
+ uint32_t CUpti_ActivityPCSampling2::latencySamples [inherited]
+
+
+

Number of times the PC was sampled with the stallReason in the record. These samples indicate that no instruction was issued + in that cycle from the warp scheduler from where the warp was sampled. Field is valid for devices with compute capability + 6.0 and higher +

+
+
+
+ uint32_t CUpti_ActivityPCSampling2::pcOffset [inherited]
+
+
+

The pc offset for the instruction.

+
+
+
+ uint32_t CUpti_ActivityPCSampling2::samples [inherited]
+
+
+

Number of times the PC was sampled with the stallReason in the record. The same PC can be sampled with different stall reasons. + The count includes latencySamples. +

+
+
+
+ uint32_t CUpti_ActivityPCSampling2::sourceLocatorId [inherited]
+
+
+

The ID for source locator.

+
+
+
+ CUpti_ActivityPCSamplingStallReasonCUpti_ActivityPCSampling2::stallReason [inherited]
+
+
+

Current stall reason. Includes one of the reasons from CUpti_ActivityPCSamplingStallReason

+
+
+
+
+
+
+

6.83. CUpti_ActivityPCSampling3 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity records information obtained by sampling PC (CUPTI_ACTIVITY_KIND_PC_SAMPLING).

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
CUpti_ActivityFlag flags
+
+
uint32_t  functionId
+
+
CUpti_ActivityKind kind
+
+
uint32_t  latencySamples
+
+
uint64_t  pcOffset
+
+
uint32_t  samples
+
+
uint32_t  sourceLocatorId
+
+
CUpti_ActivityPCSamplingStallReason stallReason
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityPCSampling3::correlationId [inherited]
+
+
+

The correlation ID of the kernel to which this result is associated.

+
+
+
+ CUpti_ActivityFlagCUpti_ActivityPCSampling3::flags [inherited]
+
+
+

The properties of this instruction.

+
+
+
+ uint32_t CUpti_ActivityPCSampling3::functionId [inherited]
+
+
+

Correlation ID with global/device function name

+
+
+
+ CUpti_ActivityKindCUpti_ActivityPCSampling3::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_PC_SAMPLING.

+
+
+
+ uint32_t CUpti_ActivityPCSampling3::latencySamples [inherited]
+
+
+

Number of times the PC was sampled with the stallReason in the record. These samples indicate that no instruction was issued + in that cycle from the warp scheduler from where the warp was sampled. Field is valid for devices with compute capability + 6.0 and higher +

+
+
+
+ uint64_t CUpti_ActivityPCSampling3::pcOffset [inherited]
+
+
+

The pc offset for the instruction.

+
+
+
+ uint32_t CUpti_ActivityPCSampling3::samples [inherited]
+
+
+

Number of times the PC was sampled with the stallReason in the record. The same PC can be sampled with different stall reasons. + The count includes latencySamples. +

+
+
+
+ uint32_t CUpti_ActivityPCSampling3::sourceLocatorId [inherited]
+
+
+

The ID for source locator.

+
+
+
+ CUpti_ActivityPCSamplingStallReasonCUpti_ActivityPCSampling3::stallReason [inherited]
+
+
+

Current stall reason. Includes one of the reasons from CUpti_ActivityPCSamplingStallReason

+
+
+
+
+
+
+

6.84. CUpti_ActivityPCSamplingConfig Struct Reference

+

[CUPTI Activity API] +

+
+

This structure defines the pc sampling configuration.

+

See function cuptiActivityConfigurePCSampling

+
+

Public Variables

+
+
CUpti_ActivityPCSamplingPeriod samplingPeriod
+
+
uint32_t  samplingPeriod2
+
+
uint32_t  size
+
+
+
+

Variables

+
+
+ CUpti_ActivityPCSamplingPeriodCUpti_ActivityPCSamplingConfig::samplingPeriod [inherited]
+
+
+

There are 5 level provided for sampling period. The level internally maps to a period in terms of cycles. Same level can + map to different number of cycles on different gpus. No of cycles will be chosen to minimize information loss. The period + chosen will be given by samplingPeriodInCycles in CUpti_ActivityPCSamplingRecordInfo for each kernel instance. +

+
+
+
+ uint32_t CUpti_ActivityPCSamplingConfig::samplingPeriod2 [inherited]
+
+
+

This will override the period set by samplingPeriod. Value 0 in samplingPeriod2 will be considered as samplingPeriod2 should + not be used and samplingPeriod should be used. Valid values for samplingPeriod2 are between 5 to 31 both inclusive. This will + set the sampling period to (2^samplingPeriod2) cycles. +

+
+
+
+ uint32_t CUpti_ActivityPCSamplingConfig::size [inherited]
+
+
+

Size of configuration structure. CUPTI client should set the size of the structure. It will be used in CUPTI to check what + fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+
+
+
+

6.85. CUpti_ActivityPCSamplingRecordInfo Struct Reference

+

[CUPTI Activity API] +

+
+

This activity records information obtained by sampling PC (CUPTI_ACTIVITY_KIND_PC_SAMPLING_RECORD_INFO).

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
uint64_t  droppedSamples
+
+
CUpti_ActivityKind kind
+
+
uint64_t  samplingPeriodInCycles
+
+
uint64_t  totalSamples
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityPCSamplingRecordInfo::correlationId [inherited]
+
+
+

The correlation ID of the kernel to which this result is associated.

+
+
+
+ uint64_t CUpti_ActivityPCSamplingRecordInfo::droppedSamples [inherited]
+
+
+

Number of samples that were dropped by hardware due to backpressure/overflow.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityPCSamplingRecordInfo::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_PC_SAMPLING_RECORD_INFO.

+
+
+
+ uint64_t CUpti_ActivityPCSamplingRecordInfo::samplingPeriodInCycles [inherited]
+
+
+

Sampling period in terms of number of cycles .

+
+
+
+ uint64_t CUpti_ActivityPCSamplingRecordInfo::totalSamples [inherited]
+
+
+

Number of times the PC was sampled for this kernel instance including all dropped samples.

+
+
+
+
+
+
+

6.86. CUpti_ActivityPreemption Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a preemption of a CDP kernel.

+
+

Public Variables

+
+
uint32_t  blockX
+
+
uint32_t  blockY
+
+
uint32_t  blockZ
+
+
int64_t  gridId
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pad
+
+
CUpti_ActivityPreemptionKind preemptionKind
+
+
uint64_t  timestamp
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityPreemption::blockX [inherited]
+
+
+

The X-dimension of the block that is preempted

+
+
+
+ uint32_t CUpti_ActivityPreemption::blockY [inherited]
+
+
+

The Y-dimension of the block that is preempted

+
+
+
+ uint32_t CUpti_ActivityPreemption::blockZ [inherited]
+
+
+

The Z-dimension of the block that is preempted

+
+
+
+ int64_t CUpti_ActivityPreemption::gridId [inherited]
+
+
+

The grid-id of the block that is preempted

+
+
+
+ CUpti_ActivityKindCUpti_ActivityPreemption::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_PREEMPTION

+
+
+
+ uint32_t CUpti_ActivityPreemption::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ CUpti_ActivityPreemptionKindCUpti_ActivityPreemption::preemptionKind [inherited]
+
+
+

kind of the preemption

+
+
+
+ uint64_t CUpti_ActivityPreemption::timestamp [inherited]
+
+
+

The timestamp of the preemption, in ns. A value of 0 indicates that timestamp information could not be collected for the + preemption. +

+
+
+
+
+
+
+

6.87. CUpti_ActivitySharedAccess Struct Reference

+

[CUPTI Activity API] +

+
+

This activity records the locations of the shared accesses in the source (CUPTI_ACTIVITY_KIND_SHARED_ACCESS).

+
+

Public Variables

+
+
uint32_t  correlationId
+
+
uint32_t  executed
+
+
CUpti_ActivityFlag flags
+
+
uint32_t  functionId
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pad
+
+
uint32_t  pcOffset
+
+
uint64_t  sharedTransactions
+
+
uint32_t  sourceLocatorId
+
+
uint64_t  theoreticalSharedTransactions
+
+
uint64_t  threadsExecuted
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivitySharedAccess::correlationId [inherited]
+
+
+

The correlation ID of the kernel to which this result is associated.

+
+
+
+ uint32_t CUpti_ActivitySharedAccess::executed [inherited]
+
+
+

The number of times this instruction was executed per warp. It will be incremented when at least one of thread among warp + is active with predicate and condition code evaluating to true. +

+
+
+
+ CUpti_ActivityFlagCUpti_ActivitySharedAccess::flags [inherited]
+
+
+

The properties of this shared access.

+
+
+
+ uint32_t CUpti_ActivitySharedAccess::functionId [inherited]
+
+
+

Correlation ID with global/device function name

+
+
+
+ CUpti_ActivityKindCUpti_ActivitySharedAccess::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_SHARED_ACCESS.

+
+
+
+ uint32_t CUpti_ActivitySharedAccess::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivitySharedAccess::pcOffset [inherited]
+
+
+

The pc offset for the access.

+
+
+
+ uint64_t CUpti_ActivitySharedAccess::sharedTransactions [inherited]
+
+
+

The total number of shared memory transactions generated by this access

+
+
+
+ uint32_t CUpti_ActivitySharedAccess::sourceLocatorId [inherited]
+
+
+

The ID for source locator.

+
+
+
+ uint64_t CUpti_ActivitySharedAccess::theoreticalSharedTransactions [inherited]
+
+
+

The minimum number of shared memory transactions possible based on the access pattern.

+
+
+
+ uint64_t CUpti_ActivitySharedAccess::threadsExecuted [inherited]
+
+
+

This increments each time when this instruction is executed by number of threads that executed this instruction with predicate + and condition code evaluating to true. +

+
+
+
+
+
+
+

6.88. CUpti_ActivitySourceLocator Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a source locator (CUPTI_ACTIVITY_KIND_SOURCE_LOCATOR).

+
+

Public Variables

+
+
const + char + * fileName
+
+
uint32_t  id
+
+
CUpti_ActivityKind kind
+
+
uint32_t  lineNumber
+
+
+
+

Variables

+
+
+ const + + char + * CUpti_ActivitySourceLocator::fileName [inherited]
+
+
+

The path for the file.

+
+
+
+ uint32_t CUpti_ActivitySourceLocator::id [inherited]
+
+
+

The ID for the source path, will be used in all the source level results.

+
+
+
+ CUpti_ActivityKindCUpti_ActivitySourceLocator::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_SOURCE_LOCATOR.

+
+
+
+ uint32_t CUpti_ActivitySourceLocator::lineNumber [inherited]
+
+
+

The line number in the source .

+
+
+
+
+
+
+

6.89. CUpti_ActivityStream Struct Reference

+

[CUPTI Activity API] +

+
+

This activity is used to track created streams. (CUPTI_ACTIVITY_KIND_STREAM).

+
+

Public Variables

+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
CUpti_ActivityStreamFlag flag
+
+
CUpti_ActivityKind kind
+
+
uint32_t  priority
+
+
uint32_t  streamId
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityStream::contextId [inherited]
+
+
+

The ID of the context where the stream was created.

+
+
+
+ uint32_t CUpti_ActivityStream::correlationId [inherited]
+
+
+

The correlation ID of the API to which this result is associated.

+
+
+
+ CUpti_ActivityStreamFlagCUpti_ActivityStream::flag [inherited]
+
+
+

Flags associated with the stream.

+
+
+
+ CUpti_ActivityKindCUpti_ActivityStream::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_STREAM.

+
+
+
+ uint32_t CUpti_ActivityStream::priority [inherited]
+
+
+

The clamped priority for the stream.

+
+
+
+ uint32_t CUpti_ActivityStream::streamId [inherited]
+
+
+

A unique stream ID to identify the stream.

+
+
+
+
+
+
+

6.90. CUpti_ActivitySynchronization Struct Reference

+

[CUPTI Activity API] +

+
+

This activity is used to track various CUDA synchronization APIs. (CUPTI_ACTIVITY_KIND_SYNCHRONIZATION).

+
+

Public Variables

+
+
uint32_t  contextId
+
+
uint32_t  correlationId
+
+
uint32_t  cudaEventId
+
+
uint64_t  end
+
+
CUpti_ActivityKind kind
+
+
uint64_t  start
+
+
uint32_t  streamId
+
+
CUpti_ActivitySynchronizationType type
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivitySynchronization::contextId [inherited]
+
+
+

The ID of the context for which the synchronization API is called. In case of context synchronization API it is the context + id for which the API is called. In case of stream/event synchronization it is the ID of the context where the stream/event + was created. +

+
+
+
+ uint32_t CUpti_ActivitySynchronization::correlationId [inherited]
+
+
+

The correlation ID of the API to which this result is associated.

+
+
+
+ uint32_t CUpti_ActivitySynchronization::cudaEventId [inherited]
+
+
+

The event ID for which the synchronization API is called. A CUPTI_SYNCHRONIZATION_INVALID_VALUE value indicate the field + is not applicable for this record. Not valid for cuCtxSynchronize, cuStreamSynchronize. +

+
+
+
+ uint64_t CUpti_ActivitySynchronization::end [inherited]
+
+
+

The end timestamp for the function, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information + could not be collected for the function. +

+
+
+
+ CUpti_ActivityKindCUpti_ActivitySynchronization::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_SYNCHRONIZATION.

+
+
+
+ uint64_t CUpti_ActivitySynchronization::start [inherited]
+
+
+

The start timestamp for the function, in ns. A value of 0 for both the start and end timestamps indicates that timestamp + information could not be collected for the function. +

+
+
+
+ uint32_t CUpti_ActivitySynchronization::streamId [inherited]
+
+
+

The compute stream for which the synchronization API is called. A CUPTI_SYNCHRONIZATION_INVALID_VALUE value indicate the + field is not applicable for this record. Not valid for cuCtxSynchronize, cuEventSynchronize. +

+
+
+
+ CUpti_ActivitySynchronizationTypeCUpti_ActivitySynchronization::type [inherited]
+
+
+

The type of record.

+
+
+
+
+
+
+

6.91. CUpti_ActivityUnifiedMemoryCounter Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a Unified Memory counter (CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER).

+
+

Public Variables

+
+
CUpti_ActivityUnifiedMemoryCounterKind counterKind
+
+
uint32_t  deviceId
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pad
+
+
uint32_t  processId
+
+
CUpti_ActivityUnifiedMemoryCounterScope scope
+
+
uint64_t  timestamp
+
+
uint64_t  value
+
+
+
+

Variables

+
+
+ CUpti_ActivityUnifiedMemoryCounterKindCUpti_ActivityUnifiedMemoryCounter::counterKind [inherited]
+
+
+

The Unified Memory counter kind. See CUpti_ActivityUnifiedMemoryCounterKind

+
+
+
+ uint32_t CUpti_ActivityUnifiedMemoryCounter::deviceId [inherited]
+
+
+

The ID of the device involved in the memory transfer operation. It is not relevant if the scope of the counter is global + (all devices). +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityUnifiedMemoryCounter::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER

+
+
+
+ uint32_t CUpti_ActivityUnifiedMemoryCounter::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityUnifiedMemoryCounter::processId [inherited]
+
+
+

The ID of the process to which this record belongs to. In case of global scope, processId is undefined.

+
+
+
+ CUpti_ActivityUnifiedMemoryCounterScopeCUpti_ActivityUnifiedMemoryCounter::scope [inherited]
+
+
+

Scope of the Unified Memory counter. See CUpti_ActivityUnifiedMemoryCounterScope

+
+
+
+ uint64_t CUpti_ActivityUnifiedMemoryCounter::timestamp [inherited]
+
+
+

The timestamp when this sample was retrieved, in ns. A value of 0 indicates that timestamp information could not be collected + +

+
+
+
+ uint64_t CUpti_ActivityUnifiedMemoryCounter::value [inherited]
+
+
+

Value of the counter

+
+
+
+
+
+
+

6.92. CUpti_ActivityUnifiedMemoryCounter2 Struct Reference

+

[CUPTI Activity API] +

+
+

This activity record represents a Unified Memory counter (CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER).

+
+

Public Variables

+
+
uint64_t  address
+
+
CUpti_ActivityUnifiedMemoryCounterKind counterKind
+
+
uint32_t  dstId
+
+
uint64_t  end
+
+
uint32_t  flags
+
+
CUpti_ActivityKind kind
+
+
uint32_t  pad
+
+
uint32_t  processId
+
+
uint32_t  srcId
+
+
uint64_t  start
+
+
uint32_t  streamId
+
+
uint64_t  value
+
+
+
+

Variables

+
+
+ uint64_t CUpti_ActivityUnifiedMemoryCounter2::address [inherited]
+
+
+

This is the virtual base address of the page/s being transferred. For cpu and gpu faults, the virtual address for the page + that faulted. +

+
+
+
+ CUpti_ActivityUnifiedMemoryCounterKindCUpti_ActivityUnifiedMemoryCounter2::counterKind [inherited]
+
+
+

The Unified Memory counter kind

+
+
+
+ uint32_t CUpti_ActivityUnifiedMemoryCounter2::dstId [inherited]
+
+
+

The ID of the destination CPU/device involved in the memory transfer or remote map operation. Ignore this field if counterKind + is CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT or CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT + or CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THRASHING or CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THROTTLING +

+
+
+
+ uint64_t CUpti_ActivityUnifiedMemoryCounter2::end [inherited]
+
+
+

The end timestamp of the counter, in ns. Ignore this field if counterKind is CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT + or CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THRASHING or CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_REMOTE_MAP. For counterKind + CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD and CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH, + timestamp is captured when activity finishes on GPU. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT, + timestamp is captured when CUDA driver queues the replay of faulting memory accesses on the GPU For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THROTTLING, + timestamp is captured when throttling operation was finished by CUDA driver +

+
+
+
+ uint32_t CUpti_ActivityUnifiedMemoryCounter2::flags [inherited]
+
+
+

The flags associated with this record. See enums CUpti_ActivityUnifiedMemoryAccessType if counterKind is CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT and CUpti_ActivityUnifiedMemoryMigrationCause if counterKind is CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD or CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD + and CUpti_ActivityUnifiedMemoryRemoteMapCause if counterKind is CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_REMOTE_MAP and CUpti_ActivityFlag if counterKind is CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THRASHING or CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THROTTLING + +

+
+
+
+ CUpti_ActivityKindCUpti_ActivityUnifiedMemoryCounter2::kind [inherited]
+
+
+

The activity record kind, must be CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER

+
+
+
+ uint32_t CUpti_ActivityUnifiedMemoryCounter2::pad [inherited]
+
+
+

Undefined. Reserved for internal use.

+
+
+
+ uint32_t CUpti_ActivityUnifiedMemoryCounter2::processId [inherited]
+
+
+

The ID of the process to which this record belongs to.

+
+
+
+ uint32_t CUpti_ActivityUnifiedMemoryCounter2::srcId [inherited]
+
+
+

The ID of the source CPU/device involved in the memory transfer, page fault, thrashing, throttling or remote map operation. + For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THRASHING, it is a bitwise ORing of the device IDs fighting for + the memory region. Ignore this field if counterKind is CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT +

+
+
+
+ uint64_t CUpti_ActivityUnifiedMemoryCounter2::start [inherited]
+
+
+

The start timestamp of the counter, in ns. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD + and CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH, timestamp is captured when activity starts on GPU. For + counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT and CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT, + timestamp is captured when CUDA driver started processing the fault. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THRASHING, + timestamp is captured when CUDA driver detected thrashing of memory region. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THROTTLING, + timestamp is captured when throttling opeeration was started by CUDA driver. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_REMOTE_MAP, + timestamp is captured when CUDA driver has pushed all required operations to the processor specified by dstId. +

+
+
+
+ uint32_t CUpti_ActivityUnifiedMemoryCounter2::streamId [inherited]
+
+
+

The ID of the stream causing the transfer. This value of this field is invalid.

+
+
+
+ uint64_t CUpti_ActivityUnifiedMemoryCounter2::value [inherited]
+
+
+

Value of the counter For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD, CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH, + CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THREASHING and CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_REMOTE_MAP, it is the + size of the memory region in bytes. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT, it is the number + of page fault groups for the same page. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT, it + is the program counter for the instruction that caused fault. +

+
+
+
+
+
+
+

6.93. CUpti_ActivityUnifiedMemoryCounterConfig Struct Reference

+

[CUPTI Activity API] +

+
+

This structure controls the enable/disable of the various Unified Memory counters consisting of scope, kind and other parameters. + See function cuptiActivityConfigureUnifiedMemoryCounter

+
+

Public Variables

+
+
uint32_t  deviceId
+
+
uint32_t  enable
+
+
CUpti_ActivityUnifiedMemoryCounterKind kind
+
+
CUpti_ActivityUnifiedMemoryCounterScope scope
+
+
+
+

Variables

+
+
+ uint32_t CUpti_ActivityUnifiedMemoryCounterConfig::deviceId [inherited]
+
+
+

Device id of the traget device. This is relevant only for single device scopes. (deprecated in CUDA 7.0)

+
+
+
+ uint32_t CUpti_ActivityUnifiedMemoryCounterConfig::enable [inherited]
+
+
+

Control to enable/disable the counter. To enable the counter set it to non-zero value while disable is indicated by zero. + +

+
+
+
+ CUpti_ActivityUnifiedMemoryCounterKindCUpti_ActivityUnifiedMemoryCounterConfig::kind [inherited]
+
+
+

Unified Memory counter Counter kind

+
+
+
+ CUpti_ActivityUnifiedMemoryCounterScopeCUpti_ActivityUnifiedMemoryCounterConfig::scope [inherited]
+
+
+

Unified Memory counter Counter scope. (deprecated in CUDA 7.0)

+
+
+
+
+
+
+

6.94. CUpti_CallbackData Struct Reference

+

[CUPTI Callback API] +

+
+

Data passed into a runtime or driver API callback function as the cbdata argument to CUpti_CallbackFunc. The cbdata will be this type for domain equal to CUPTI_CB_DOMAIN_DRIVER_API or CUPTI_CB_DOMAIN_RUNTIME_API. The callback data is valid only within the invocation + of the callback function that is passed the data. If you need to retain some data for use outside of the callback, you must + make a copy of that data. For example, if you make a shallow copy of CUpti_CallbackData within a callback, you cannot dereference functionParams outside of that callback to access the function parameters. functionName is an exception: the string pointed to by functionName is a global constant and so may be accessed outside of the callback. +

+
+

Public Variables

+
+
CUpti_ApiCallbackSite callbackSite
+
+
CUcontext  context
+
+
uint32_t  contextUid
+
+
+ uint64_t + * correlationData
+
+
uint32_t  correlationId
+
+
const + char + * functionName
+
+
const + void + * functionParams
+
+
+ void + * functionReturnValue
+
+
const + char + * symbolName
+
+
+
+

Variables

+
+
+ CUpti_ApiCallbackSiteCUpti_CallbackData::callbackSite [inherited]
+
+
+

Point in the runtime or driver function from where the callback was issued.

+
+
+
+ CUcontext CUpti_CallbackData::context [inherited]
+
+
+

Driver context current to the thread, or null if no context is current. This value can change from the entry to exit callback + of a runtime API function if the runtime initializes a context. +

+
+
+
+ uint32_t CUpti_CallbackData::contextUid [inherited]
+
+
+

Unique ID for the CUDA context associated with the thread. The UIDs are assigned sequentially as contexts are created and + are unique within a process. +

+
+
+
+ + + uint64_t + * CUpti_CallbackData::correlationData [inherited]
+
+
+

Pointer to data shared between the entry and exit callbacks of a given runtime or drive API function invocation. This field + can be used to pass 64-bit values from the entry callback to the corresponding exit callback. +

+
+
+
+ uint32_t CUpti_CallbackData::correlationId [inherited]
+
+
+

The activity record correlation ID for this callback. For a driver domain callback (i.e. domain CUPTI_CB_DOMAIN_DRIVER_API) this ID will equal the correlation ID in the CUpti_ActivityAPI record corresponding to the CUDA driver function call. For a runtime domain callback (i.e. domain CUPTI_CB_DOMAIN_RUNTIME_API) this ID will equal the correlation ID in the CUpti_ActivityAPI record corresponding to the CUDA runtime function call. Within the callback, this ID can be recorded to correlate user data + with the activity record. This field is new in 4.1. +

+
+
+
+ const + + char + * CUpti_CallbackData::functionName [inherited]
+
+
+

Name of the runtime or driver API function which issued the callback. This string is a global constant and so may be accessed + outside of the callback. +

+
+
+
+ const + + void + * CUpti_CallbackData::functionParams [inherited]
+
+
+

Pointer to the arguments passed to the runtime or driver API call. See generated_cuda_runtime_api_meta.h and generated_cuda_meta.h + for structure definitions for the parameters for each runtime and driver API function. +

+
+
+
+ + + void + * CUpti_CallbackData::functionReturnValue [inherited]
+
+
+

Pointer to the return value of the runtime or driver API call. This field is only valid within the exit::CUPTI_API_EXIT callback. For a runtime API functionReturnValue points to a cudaError_t. For a driver API functionReturnValue points to a CUresult. +

+
+
+
+ const + + char + * CUpti_CallbackData::symbolName [inherited]
+
+
+

Name of the symbol operated on by the runtime or driver API function which issued the callback. This entry is valid only + for driver and runtime launch callbacks, where it returns the name of the kernel. +

+
+
+
+
+
+
+

6.95. CUpti_EventGroupSet Struct Reference

+

[CUPTI Event API] +

+
+

A set of event groups. When returned by cuptiEventGroupSetsCreate and cuptiMetricCreateEventGroupSets a set indicates that event groups that can be enabled at the same time (i.e. all the events in the set can be collected simultaneously). + +

+
+

Public Variables

+
+
CUpti_EventGroup* + * eventGroups
+
+
uint32_t  numEventGroups
+
+
+
+

Variables

+
+
+ CUpti_EventGroup* + * CUpti_EventGroupSet::eventGroups [inherited]
+
+
+

An array of numEventGroups event groups. +

+
+
+
+ uint32_t CUpti_EventGroupSet::numEventGroups [inherited]
+
+
+

The number of event groups in the set.

+
+
+
+
+
+
+

6.96. CUpti_EventGroupSets Struct Reference

+

[CUPTI Event API] +

+
+

A set of event group sets. When returned by cuptiEventGroupSetsCreate and cuptiMetricCreateEventGroupSets a CUpti_EventGroupSets indicates the number of passes required to collect all the events, and the event groups that should be collected during each + pass. +

+
+

Public Variables

+
+
uint32_t  numSets
+
+
CUpti_EventGroupSet + * sets
+
+
+
+

Variables

+
+
+ uint32_t CUpti_EventGroupSets::numSets [inherited]
+
+
+

Number of event group sets.

+
+
+
+ CUpti_EventGroupSet + * CUpti_EventGroupSets::sets [inherited]
+
+
+

An array of numSets event group sets. +

+
+
+
+
+
+
+

6.97. CUpti_GetCubinCrcParams Struct Reference

+

[CUPTI PC Sampling API] +

+
+

+
+

Public Variables

+
+
const + void + * cubin
+
+
uint64_t  cubinCrc
+
+
size_t  cubinSize
+
+
size_t  size
+
+
+
+

Variables

+
+
+ const + + void + * CUpti_GetCubinCrcParams::cubin [inherited]
+
+
+

[w] Pointer to cubin binary

+
+
+
+ uint64_t CUpti_GetCubinCrcParams::cubinCrc [inherited]
+
+
+

[r] Computed CRC will be stored in it.

+
+
+
+ size_t CUpti_GetCubinCrcParams::cubinSize [inherited]
+
+
+

[w] Size of cubin binary.

+
+
+
+ size_t CUpti_GetCubinCrcParams::size [inherited]
+
+
+

[w] Size of configuration structure. CUPTI client should set the size of the structure. It will be used in CUPTI to check + what fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+
+
+
+

6.98. CUpti_GetSassToSourceCorrelationParams Struct Reference

+

[CUPTI PC Sampling API] +

+
+

+
+

Public Variables

+
+
const + void + * cubin
+
+
size_t  cubinSize
+
+
+ char + * dirName
+
+
+ char + * fileName
+
+
const + char + * functionName
+
+
uint32_t  lineNumber
+
+
uint64_t  pcOffset
+
+
size_t  size
+
+
+
+

Variables

+
+
+ const + + void + * CUpti_GetSassToSourceCorrelationParams::cubin [inherited]
+
+
+

[w] Pointer to cubin binary where function belongs.

+
+
+
+ size_t CUpti_GetSassToSourceCorrelationParams::cubinSize [inherited]
+
+
+

[w] Size of cubin binary.

+
+
+
+ + + char + * CUpti_GetSassToSourceCorrelationParams::dirName [inherited]
+
+
+

[r] Path for the directory of source file.

+
+
+
+ + + char + * CUpti_GetSassToSourceCorrelationParams::fileName [inherited]
+
+
+

[r] Path for the source file.

+
+
+
+ const + + char + * CUpti_GetSassToSourceCorrelationParams::functionName [inherited]
+
+
+

[w] Function name to which PC belongs.

+
+
+
+ uint32_t CUpti_GetSassToSourceCorrelationParams::lineNumber [inherited]
+
+
+

[r] Line number in the source code.

+
+
+
+ uint64_t CUpti_GetSassToSourceCorrelationParams::pcOffset [inherited]
+
+
+

[w] PC offset

+
+
+
+ size_t CUpti_GetSassToSourceCorrelationParams::size [inherited]
+
+
+

[w] Size of the data structure i.e. CUpti_GetSassToSourceCorrelationParamsSize CUPTI client should set the size of the structure. + It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+
+
+
+

6.99. CUpti_GraphData Struct Reference

+

[CUPTI Callback API] +

+
+

CUDA graphs data passed into a resource callback function as the cbdata argument to CUpti_CallbackFunc. The cbdata will be this type for domain equal to CUPTI_CB_DOMAIN_RESOURCE. The graph data is valid only within the invocation of the callback function that is passed + the data. If you need to retain some data for use outside of the callback, you must make a copy of that data. +

+
+

Public Variables

+
+
CUgraphNode  dependency
+
+
CUgraph  graph
+
+
CUgraphExec  graphExec
+
+
CUgraphNode  node
+
+
CUgraphNodeType  nodeType
+
+
CUgraph  originalGraph
+
+
CUgraphNode  originalNode
+
+
+
+

Variables

+
+
+ CUgraphNode CUpti_GraphData::dependency [inherited]
+
+
+

The dependent graph node The size of the array is + +

+
+
+
+ CUgraph CUpti_GraphData::graph [inherited]
+
+
+

CUDA graph

+
+
+
+ CUgraphExec CUpti_GraphData::graphExec [inherited]
+
+
+

CUDA executable graph

+
+
+
+ CUgraphNode CUpti_GraphData::node [inherited]
+
+
+

CUDA graph node

+
+
+
+ CUgraphNodeType CUpti_GraphData::nodeType [inherited]
+
+
+

Type of the + +

+
+
+
+ CUgraph CUpti_GraphData::originalGraph [inherited]
+
+
+

The original CUDA graph from which + +

+
+
+
+ CUgraphNode CUpti_GraphData::originalNode [inherited]
+
+
+

The original CUDA graph node from which + +

+
+
+
+
+
+
+

6.100. CUpti_MetricValue Union Reference

+

[CUPTI Metric API] +

+
+

Metric values can be one of several different kinds. Corresponding to each kind is a member of the CUpti_MetricValue union. The metric value returned by cuptiMetricGetValue should be accessed using the appropriate member of that union based on its value kind. +

+
+
+
+

6.101. CUpti_ModuleResourceData Struct Reference

+

[CUPTI Callback API] +

+
+

CUDA module data passed into a resource callback function as the cbdata argument to CUpti_CallbackFunc. The cbdata will be this type for domain equal to CUPTI_CB_DOMAIN_RESOURCE. The module data is valid only within the invocation of the callback function that is passed + the data. If you need to retain some data for use outside of the callback, you must make a copy of that data. +

+
+

Public Variables

+
+
size_t  cubinSize
+
+
uint32_t  moduleId
+
+
const + char + * pCubin
+
+
+
+

Variables

+
+
+ size_t CUpti_ModuleResourceData::cubinSize [inherited]
+
+
+

The size of the cubin.

+
+
+
+ uint32_t CUpti_ModuleResourceData::moduleId [inherited]
+
+
+

Identifier to associate with the CUDA module.

+
+
+
+ const + + char + * CUpti_ModuleResourceData::pCubin [inherited]
+
+
+

Pointer to the associated cubin.

+
+
+
+
+
+
+

6.102. CUpti_NvtxData Struct Reference

+

[CUPTI Callback API] +

+
+

Data passed into a NVTX callback function as the cbdata argument to CUpti_CallbackFunc. The cbdata will be this type for domain equal to CUPTI_CB_DOMAIN_NVTX. Unless otherwise notes, the callback data is valid only within the invocation of the callback + function that is passed the data. If you need to retain some data for use outside of the callback, you must make a copy of + that data. +

+
+

Public Variables

+
+
const + char + * functionName
+
+
const + void + * functionParams
+
+
const + void + * functionReturnValue
+
+
+
+

Variables

+
+
+ const + + char + * CUpti_NvtxData::functionName [inherited]
+
+
+

Name of the NVTX API function which issued the callback. This string is a global constant and so may be accessed outside + of the callback. +

+
+
+
+ const + + void + * CUpti_NvtxData::functionParams [inherited]
+
+
+

Pointer to the arguments passed to the NVTX API call. See generated_nvtx_meta.h for structure definitions for the parameters + for each NVTX API function. +

+
+
+
+ const + + void + * CUpti_NvtxData::functionReturnValue [inherited]
+
+
+

Pointer to the return value of the NVTX API call. See nvToolsExt.h for each NVTX API function's return value.

+
+
+
+
+
+
+

6.103. CUpti_PCSamplingConfigurationInfo Struct Reference

+

[CUPTI PC Sampling API] +

+
+

This structure provides CUpti_PCSamplingConfigurationAttributeType which can be configured or queried for PC sampling configuration +

+
+

Public Variables

+
+
CUpti_PCSamplingConfigurationAttributeType attributeType
+
+
CUpti_PCSamplingConfigurationInfo::@49::@55  collectionModeData
+
+
CUpti_PCSamplingConfigurationInfo::@49::@56  enableStartStopControlData
+
+
CUpti_PCSamplingConfigurationInfo::@49::@54  hardwareBufferSizeData
+
+
CUpti_PCSamplingConfigurationInfo::@49::@50  invalidData
+
+
CUpti_PCSamplingConfigurationInfo::@49::@57  outputDataFormatData
+
+
CUpti_PCSamplingConfigurationInfo::@49::@58  samplingDataBufferData
+
+
CUpti_PCSamplingConfigurationInfo::@49::@51  samplingPeriodData
+
+
CUpti_PCSamplingConfigurationInfo::@49::@53  scratchBufferSizeData
+
+
CUpti_PCSamplingConfigurationInfo::@49::@52  stallReasonData
+
+
+
+

Variables

+
+
+ CUpti_PCSamplingConfigurationAttributeTypeCUpti_PCSamplingConfigurationInfo::attributeType [inherited]
+
+
+

Refer CUpti_PCSamplingConfigurationAttributeType for all supported attribute types +

+
+
+
+ CUpti_PCSamplingConfigurationInfo::@49::@55 CUpti_PCSamplingConfigurationInfo::collectionModeData [inherited]
+
+ +
+
+ CUpti_PCSamplingConfigurationInfo::@49::@56 CUpti_PCSamplingConfigurationInfo::enableStartStopControlData [inherited]
+
+ +
+
+ CUpti_PCSamplingConfigurationInfo::@49::@54 CUpti_PCSamplingConfigurationInfo::hardwareBufferSizeData [inherited]
+
+ +
+
+ CUpti_PCSamplingConfigurationInfo::@49::@50 CUpti_PCSamplingConfigurationInfo::invalidData [inherited]
+
+
+

Invalid Value

+
+
+
+ CUpti_PCSamplingConfigurationInfo::@49::@57 CUpti_PCSamplingConfigurationInfo::outputDataFormatData [inherited]
+
+ +
+
+ CUpti_PCSamplingConfigurationInfo::@49::@58 CUpti_PCSamplingConfigurationInfo::samplingDataBufferData [inherited]
+
+ +
+
+ CUpti_PCSamplingConfigurationInfo::@49::@51 CUpti_PCSamplingConfigurationInfo::samplingPeriodData [inherited]
+
+ +
+
+ CUpti_PCSamplingConfigurationInfo::@49::@53 CUpti_PCSamplingConfigurationInfo::scratchBufferSizeData [inherited]
+
+ +
+
+ CUpti_PCSamplingConfigurationInfo::@49::@52 CUpti_PCSamplingConfigurationInfo::stallReasonData [inherited]
+
+ +
+
+
+
+
+

6.104. CUpti_PCSamplingConfigurationInfoParams Struct Reference

+

[CUPTI PC Sampling API] +

+
+

This structure configures PC sampling using cuptiPCSamplingSetConfigurationAttribute and queries PC sampling default configuration using cuptiPCSamplingGetConfigurationAttribute

+
+

Public Variables

+
+
CUcontext  ctx
+
+
size_t  numAttributes
+
+
CUpti_PCSamplingConfigurationInfo + * pPCSamplingConfigurationInfo
+
+
+ void + * pPriv
+
+
size_t  size
+
+
+
+

Variables

+
+
+ CUcontext CUpti_PCSamplingConfigurationInfoParams::ctx [inherited]
+
+
+

[w] CUcontext

+
+
+
+ size_t CUpti_PCSamplingConfigurationInfoParams::numAttributes [inherited]
+
+
+

[w] Number of attributes to configure using cuptiPCSamplingSetConfigurationAttribute or query using cuptiPCSamplingGetConfigurationAttribute

+
+
+
+ CUpti_PCSamplingConfigurationInfo + * CUpti_PCSamplingConfigurationInfoParams::pPCSamplingConfigurationInfo [inherited]
+
+ +
+
+ + + void + * CUpti_PCSamplingConfigurationInfoParams::pPriv [inherited]
+
+
+

[w] Assign to NULL

+
+
+
+ size_t CUpti_PCSamplingConfigurationInfoParams::size [inherited]
+
+
+

[w] Size of the data structure i.e. CUpti_PCSamplingConfigurationInfoParamsSize CUPTI client should set the size of the structure. + It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+
+
+
+

6.105. CUpti_PCSamplingData Struct Reference

+

[CUPTI PC Sampling API] +

+
+

+
+

Public Variables

+
+
size_t  collectNumPcs
+
+
uint64_t  droppedSamples
+
+
uint64_t  nonUsrKernelsTotalSamples
+
+
CUpti_PCSamplingPCData + * pPcData
+
collectNumPcs
+
uint64_t  rangeId
+
+
size_t  remainingNumPcs
+
+
size_t  size
+
+
size_t  totalNumPcs
+
+
uint64_t  totalSamples
+
+
+
+

Variables

+
+
+ size_t CUpti_PCSamplingData::collectNumPcs [inherited]
+
+
+

[w] Number of PCs to be collected

+
+
+
+ uint64_t CUpti_PCSamplingData::droppedSamples [inherited]
+
+
+

[r] Number of samples that were dropped by hardware due to backpressure/overflow.

+
+
+
+ uint64_t CUpti_PCSamplingData::nonUsrKernelsTotalSamples [inherited]
+
+
+

[r] Number of samples collected across all non user kernels PCs. It includes samples for non-user kernels. It includes counts + for all non selected stall reasons as well. CUPTI does not provide PC records for non-user kernels. +

+
+
+
+ CUpti_PCSamplingPCData + * CUpti_PCSamplingData::pPcData [inherited]
+
+
+

collectNumPcs [r] Profiled PC data This data struct should have enough memory to collect number of PCs mentioned in

+
+
+
+ uint64_t CUpti_PCSamplingData::rangeId [inherited]
+
+
+

[r] Unique identifier for each range. Data collected across multiple ranges in multiple buffers can be identified using range + id. +

+
+
+
+ size_t CUpti_PCSamplingData::remainingNumPcs [inherited]
+
+
+

[r] Number of PCs available for collection

+
+
+
+ size_t CUpti_PCSamplingData::size [inherited]
+
+
+

[w] Size of the data structure. CUPTI client should set the size of the structure. It will be used in CUPTI to check what + fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+ size_t CUpti_PCSamplingData::totalNumPcs [inherited]
+
+
+

[r] Number of PCs collected

+
+
+
+ uint64_t CUpti_PCSamplingData::totalSamples [inherited]
+
+
+

[r] Number of samples collected across all PCs. It includes samples for user modules, samples for non-user kernels and dropped + samples. It includes counts for all non selected stall reasons. CUPTI does not provide PC records for non-user kernels. CUPTI + does not provide PC records for instructions for which all selected stall reason metrics counts are zero. +

+
+
+
+
+
+
+

6.106. CUpti_PCSamplingDisableParams Struct Reference

+

[CUPTI PC Sampling API] +

+
+

+
+

Public Variables

+
+
CUcontext  ctx
+
+
+ void + * pPriv
+
+
size_t  size
+
+
+
+

Variables

+
+
+ CUcontext CUpti_PCSamplingDisableParams::ctx [inherited]
+
+
+

[w] CUcontext

+
+
+
+ + + void + * CUpti_PCSamplingDisableParams::pPriv [inherited]
+
+
+

[w] Assign to NULL

+
+
+
+ size_t CUpti_PCSamplingDisableParams::size [inherited]
+
+
+

[w] Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize CUPTI client should set the size of the structure. + It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+
+
+
+

6.107. CUpti_PCSamplingEnableParams Struct Reference

+

[CUPTI PC Sampling API] +

+
+

+
+

Public Variables

+
+
CUcontext  ctx
+
+
+ void + * pPriv
+
+
size_t  size
+
+
+
+

Variables

+
+
+ CUcontext CUpti_PCSamplingEnableParams::ctx [inherited]
+
+
+

[w] CUcontext

+
+
+
+ + + void + * CUpti_PCSamplingEnableParams::pPriv [inherited]
+
+
+

[w] Assign to NULL

+
+
+
+ size_t CUpti_PCSamplingEnableParams::size [inherited]
+
+
+

[w] Size of the data structure i.e. CUpti_PCSamplingEnableParamsSize CUPTI client should set the size of the structure. It + will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+
+
+
+

6.108. CUpti_PCSamplingGetDataParams Struct Reference

+

[CUPTI PC Sampling API] +

+
+

+
+

Public Variables

+
+
CUcontext  ctx
+
+
+ void + * pPriv
+
+
+ void + * pcSamplingData
+
+
size_t  size
+
+
+
+

Variables

+
+
+ CUcontext CUpti_PCSamplingGetDataParams::ctx [inherited]
+
+
+

[w] CUcontext

+
+
+
+ + + void + * CUpti_PCSamplingGetDataParams::pPriv [inherited]
+
+
+

[w] Assign to NULL

+
+
+
+ + + void + * CUpti_PCSamplingGetDataParams::pcSamplingData [inherited]
+
+
+

+
+
+
+ size_t CUpti_PCSamplingGetDataParams::size [inherited]
+
+
+

[w] Size of the data structure i.e. CUpti_PCSamplingGetDataParamsSize CUPTI client should set the size of the structure. + It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+
+
+
+

6.109. CUpti_PCSamplingGetNumStallReasonsParams Struct Reference

+

[CUPTI PC Sampling API] +

+
+

+
+

Public Variables

+
+
CUcontext  ctx
+
+
+ size_t + * numStallReasons
+
+
+ void + * pPriv
+
+
size_t  size
+
+
+
+

Variables

+
+
+ CUcontext CUpti_PCSamplingGetNumStallReasonsParams::ctx [inherited]
+
+
+

[w] CUcontext

+
+
+
+ + + size_t + * CUpti_PCSamplingGetNumStallReasonsParams::numStallReasons [inherited]
+
+
+

[r] Number of stall reasons

+
+
+
+ + + void + * CUpti_PCSamplingGetNumStallReasonsParams::pPriv [inherited]
+
+
+

[w] Assign to NULL

+
+
+
+ size_t CUpti_PCSamplingGetNumStallReasonsParams::size [inherited]
+
+
+

[w] Size of the data structure i.e. CUpti_PCSamplingGetNumStallReasonsParamsSize CUPTI client should set the size of the + structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. + +

+
+
+
+
+
+
+

6.110. CUpti_PCSamplingGetStallReasonsParams Struct Reference

+

[CUPTI PC Sampling API] +

+
+

+
+

Public Variables

+
+
CUcontext  ctx
+
+
size_t  numStallReasons
+
+
+ void + * pPriv
+
+
size_t  size
+
+
+ uint32_t + * stallReasonIndex
+
+
+ + + * stallReasons
+
+
+
+

Variables

+
+
+ CUcontext CUpti_PCSamplingGetStallReasonsParams::ctx [inherited]
+
+
+

[w] CUcontext

+
+
+
+ size_t CUpti_PCSamplingGetStallReasonsParams::numStallReasons [inherited]
+
+
+

[w] Number of stall reasons

+
+
+
+ + + void + * CUpti_PCSamplingGetStallReasonsParams::pPriv [inherited]
+
+
+

[w] Assign to NULL

+
+
+
+ size_t CUpti_PCSamplingGetStallReasonsParams::size [inherited]
+
+
+

[w] Size of the data structure i.e. CUpti_PCSamplingGetStallReasonsParamsSize CUPTI client should set the size of the structure. + It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+ + + uint32_t + * CUpti_PCSamplingGetStallReasonsParams::stallReasonIndex [inherited]
+
+
+

[r] Stall reason index

+
+
+
+ + + + + * CUpti_PCSamplingGetStallReasonsParams::stallReasons [inherited]
+
+
+

[r] Stall reasons name

+
+
+
+
+
+
+

6.111. CUpti_PCSamplingPCData Struct Reference

+

[CUPTI PC Sampling API] +

+
+

+
+

Public Variables

+
+
uint64_t  cubinCrc
+
+
uint32_t  functionIndex
+
+
+ char + * functionName
+
+
uint32_t  pad
+
+
uint64_t  pcOffset
+
+
size_t  size
+
+
CUpti_PCSamplingStallReason + * stallReason
+
+
size_t  stallReasonCount
+
+
+
+

Variables

+
+
+ uint64_t CUpti_PCSamplingPCData::cubinCrc [inherited]
+
+
+

[r] Unique cubin id

+
+
+
+ uint32_t CUpti_PCSamplingPCData::functionIndex [inherited]
+
+
+

The function's unique symbol index in the module.

+
+
+
+ + + char + * CUpti_PCSamplingPCData::functionName [inherited]
+
+
+

[r] The function name. This name string might be shared across all the records including records from activity APIs representing + the same function, and so it should not be modified or freed until post processing of all the records is done. Once done, + it is user’s responsibility to free the memory using free() function. +

+
+
+
+ uint32_t CUpti_PCSamplingPCData::pad [inherited]
+
+
+

Padding

+
+
+
+ uint64_t CUpti_PCSamplingPCData::pcOffset [inherited]
+
+
+

[r] PC offset

+
+
+
+ size_t CUpti_PCSamplingPCData::size [inherited]
+
+
+

[w] Size of the data structure. CUPTI client should set the size of the structure. It will be used in CUPTI to check what + fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+ CUpti_PCSamplingStallReason + * CUpti_PCSamplingPCData::stallReason [inherited]
+
+
+

[r] Stall reason id Total samples

+
+
+
+ size_t CUpti_PCSamplingPCData::stallReasonCount [inherited]
+
+
+

[r] Collected stall reason count

+
+
+
+
+
+
+

6.112. CUpti_PCSamplingStallReason Struct Reference

+

[CUPTI PC Sampling API] +

+
+

+
+

Public Variables

+
+
uint32_t  pcSamplingStallReasonIndex
+
+
uint32_t  samples
+
+
+
+

Variables

+
+
+ uint32_t CUpti_PCSamplingStallReason::pcSamplingStallReasonIndex [inherited]
+
+
+

[r] Collected stall reason index

+
+
+
+ uint32_t CUpti_PCSamplingStallReason::samples [inherited]
+
+
+

[r] Number of times the PC was sampled with the stallReason.

+
+
+
+
+
+
+

6.113. CUpti_PCSamplingStartParams Struct Reference

+

[CUPTI PC Sampling API] +

+
+

+
+

Public Variables

+
+
CUcontext  ctx
+
+
+ void + * pPriv
+
+
size_t  size
+
+
+
+

Variables

+
+
+ CUcontext CUpti_PCSamplingStartParams::ctx [inherited]
+
+
+

[w] CUcontext

+
+
+
+ + + void + * CUpti_PCSamplingStartParams::pPriv [inherited]
+
+
+

[w] Assign to NULL

+
+
+
+ size_t CUpti_PCSamplingStartParams::size [inherited]
+
+
+

[w] Size of the data structure i.e. CUpti_PCSamplingStartParamsSize CUPTI client should set the size of the structure. It + will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+
+
+
+

6.114. CUpti_PCSamplingStopParams Struct Reference

+

[CUPTI PC Sampling API] +

+
+

+
+

Public Variables

+
+
CUcontext  ctx
+
+
+ void + * pPriv
+
+
size_t  size
+
+
+
+

Variables

+
+
+ CUcontext CUpti_PCSamplingStopParams::ctx [inherited]
+
+
+

[w] CUcontext

+
+
+
+ + + void + * CUpti_PCSamplingStopParams::pPriv [inherited]
+
+
+

[w] Assign to NULL

+
+
+
+ size_t CUpti_PCSamplingStopParams::size [inherited]
+
+
+

[w] Size of the data structure i.e. CUpti_PCSamplingStopParamsSize CUPTI client should set the size of the structure. It + will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+
+
+
+
+
+
+

6.115. CUpti_Profiler_BeginPass_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
CUcontext  ctx
+
[in] if NULL, the current CUcontext is used
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  structSize
+
[in] CUpti_Profiler_BeginPass_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ CUcontext CUpti_Profiler_BeginPass_Params::ctx [inherited]
+
+
+

[in] if NULL, the current CUcontext is used

+
+
+
+ + + void + * CUpti_Profiler_BeginPass_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_BeginPass_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_BeginPass_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.116. CUpti_Profiler_BeginSession_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
uint8_t  bDumpCounterDataInFile
+
[in] [optional]
+
size_t  counterDataImageSize
+
[in] size calculated from cuptiProfilerCounterDataImageCalculateSize
+
size_t  counterDataScratchBufferSize
+
[in] size calculated from cuptiProfilerCounterDataImageInitializeScratchBuffer
+
CUcontext  ctx
+
[in] if NULL, the current CUcontext is used
+
size_t  maxLaunchesPerPass
+
[in] Maximum number of kernel launches that can be recorded in a single pass; must be >= maxRangesPerPass.
+
size_t  maxRangesPerPass
+
[in] Maximum number of ranges that can be recorded in a single pass.
+
const + char + * pCounterDataFilePath
+
[in] [optional]
+
+ uint8_t + * pCounterDataImage
+
[in] address of CounterDataImage
+
+ uint8_t + * pCounterDataScratchBuffer
+
[in] address of CounterDataImage scratch buffer
+
+ void + * pPriv
+
[in] assign to NULL
+
CUpti_ProfilerRange range
+
[in] CUpti_ProfilerRange
+
CUpti_ProfilerReplayMode replayMode
+
[in] CUpti_ProfilerReplayMode
+
size_t  structSize
+
[in] CUpti_Profiler_BeginSession_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ uint8_t CUpti_Profiler_BeginSession_Params::bDumpCounterDataInFile [inherited]
+
+
+

[in] [optional]

+
+
+
+ size_t CUpti_Profiler_BeginSession_Params::counterDataImageSize [inherited]
+
+
+

[in] size calculated from cuptiProfilerCounterDataImageCalculateSize

+
+
+
+ size_t CUpti_Profiler_BeginSession_Params::counterDataScratchBufferSize [inherited]
+
+
+

[in] size calculated from cuptiProfilerCounterDataImageInitializeScratchBuffer

+
+
+
+ CUcontext CUpti_Profiler_BeginSession_Params::ctx [inherited]
+
+
+

[in] if NULL, the current CUcontext is used

+
+
+
+ size_t CUpti_Profiler_BeginSession_Params::maxLaunchesPerPass [inherited]
+
+
+

[in] Maximum number of kernel launches that can be recorded in a single pass; must be >= maxRangesPerPass.

+
+
+
+ size_t CUpti_Profiler_BeginSession_Params::maxRangesPerPass [inherited]
+
+
+

[in] Maximum number of ranges that can be recorded in a single pass.

+
+
+
+ const + + char + * CUpti_Profiler_BeginSession_Params::pCounterDataFilePath [inherited]
+
+
+

[in] [optional]

+
+
+
+ + + uint8_t + * CUpti_Profiler_BeginSession_Params::pCounterDataImage [inherited]
+
+
+

[in] address of CounterDataImage

+
+
+
+ + + uint8_t + * CUpti_Profiler_BeginSession_Params::pCounterDataScratchBuffer [inherited]
+
+
+

[in] address of CounterDataImage scratch buffer

+
+
+
+ + + void + * CUpti_Profiler_BeginSession_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ CUpti_ProfilerRangeCUpti_Profiler_BeginSession_Params::range [inherited]
+
+
+

[in] CUpti_ProfilerRange

+
+
+
+ CUpti_ProfilerReplayModeCUpti_Profiler_BeginSession_Params::replayMode [inherited]
+
+
+

[in] CUpti_ProfilerReplayMode

+
+
+
+ size_t CUpti_Profiler_BeginSession_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_BeginSession_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.117. CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
size_t  counterDataImageSize
+
[in] size calculated from cuptiProfilerCounterDataImageCalculateSize
+
size_t  counterDataScratchBufferSize
+
[out]
+
+ uint8_t + * pCounterDataImage
+
[in]
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  structSize
+
[in] CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ size_t CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params::counterDataImageSize [inherited]
+
+
+

[in] size calculated from cuptiProfilerCounterDataImageCalculateSize

+
+
+
+ size_t CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params::counterDataScratchBufferSize [inherited]
+
+
+

[out]

+
+
+
+ + + uint8_t + * CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params::pCounterDataImage [inherited]
+
+
+

[in]

+
+
+
+ + + void + * CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.118. CUpti_Profiler_CounterDataImage_CalculateSize_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
size_t  counterDataImageSize
+
[out]
+
const + CUpti_Profiler_CounterDataImageOptions + * pOptions
+
[in] Pointer to Counter Data Image Options
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  sizeofCounterDataImageOptions
+
[in] CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE
+
size_t  structSize
+
[in] CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ size_t CUpti_Profiler_CounterDataImage_CalculateSize_Params::counterDataImageSize [inherited]
+
+
+

[out]

+
+
+
+ const + + CUpti_Profiler_CounterDataImageOptions + * CUpti_Profiler_CounterDataImage_CalculateSize_Params::pOptions [inherited]
+
+
+

[in] Pointer to Counter Data Image Options

+
+
+
+ + + void + * CUpti_Profiler_CounterDataImage_CalculateSize_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_CounterDataImage_CalculateSize_Params::sizeofCounterDataImageOptions [inherited]
+
+
+

[in] CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE

+
+
+
+ size_t CUpti_Profiler_CounterDataImage_CalculateSize_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.119. CUpti_Profiler_CounterDataImage_Initialize_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
size_t  counterDataImageSize
+
[in] Size calculated from cuptiProfilerCounterDataImageCalculateSize
+
+ uint8_t + * pCounterDataImage
+
[in] The buffer to be initialized.
+
const + CUpti_Profiler_CounterDataImageOptions + * pOptions
+
[in] Pointer to Counter Data Image Options
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  sizeofCounterDataImageOptions
+
[in] CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE
+
size_t  structSize
+
[in] CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ size_t CUpti_Profiler_CounterDataImage_Initialize_Params::counterDataImageSize [inherited]
+
+
+

[in] Size calculated from cuptiProfilerCounterDataImageCalculateSize

+
+
+
+ + + uint8_t + * CUpti_Profiler_CounterDataImage_Initialize_Params::pCounterDataImage [inherited]
+
+
+

[in] The buffer to be initialized.

+
+
+
+ const + + CUpti_Profiler_CounterDataImageOptions + * CUpti_Profiler_CounterDataImage_Initialize_Params::pOptions [inherited]
+
+
+

[in] Pointer to Counter Data Image Options

+
+
+
+ + + void + * CUpti_Profiler_CounterDataImage_Initialize_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_CounterDataImage_Initialize_Params::sizeofCounterDataImageOptions [inherited]
+
+
+

[in] CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE

+
+
+
+ size_t CUpti_Profiler_CounterDataImage_Initialize_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.120. CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
size_t  counterDataImageSize
+
[in] size calculated from cuptiProfilerCounterDataImageCalculateSize
+
size_t  counterDataScratchBufferSize
+
[in] size calculated using cuptiProfilerCounterDataImageCalculateScratchBufferSize
+
+ uint8_t + * pCounterDataImage
+
[in]
+
+ uint8_t + * pCounterDataScratchBuffer
+
[in] the scratch buffer to be initialized.
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  structSize
+
[in] CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ size_t CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params::counterDataImageSize [inherited]
+
+
+

[in] size calculated from cuptiProfilerCounterDataImageCalculateSize

+
+
+
+ size_t CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params::counterDataScratchBufferSize [inherited]
+
+
+

[in] size calculated using cuptiProfilerCounterDataImageCalculateScratchBufferSize

+
+
+
+ + + uint8_t + * CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params::pCounterDataImage [inherited]
+
+
+

[in]

+
+
+
+ + + uint8_t + * CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params::pCounterDataScratchBuffer [inherited]
+
+
+

[in] the scratch buffer to be initialized.

+
+
+
+ + + void + * CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.121. CUpti_Profiler_CounterDataImageOptions Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
size_t  counterDataPrefixSize
+
[in] Size of CounterDataPrefix generated from NVPW_CounterDataBuilder_GetCounterDataPrefix().
+
uint32_t  maxNumRangeTreeNodes
+
[in] Maximum number of RangeTree nodes; must be >= maxNumRanges
+
uint32_t  maxNumRanges
+
[in] Maximum number of ranges that can be profiled
+
uint32_t  maxRangeNameLength
+
[in] Maximum string length of each RangeName, including the trailing NULL character
+
const + uint8_t + * pCounterDataPrefix
+
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  structSize
+
[in] CUpti_Profiler_CounterDataImageOptions_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ size_t CUpti_Profiler_CounterDataImageOptions::counterDataPrefixSize [inherited]
+
+
+

[in] Size of CounterDataPrefix generated from NVPW_CounterDataBuilder_GetCounterDataPrefix().

+
+
+
+ uint32_t CUpti_Profiler_CounterDataImageOptions::maxNumRangeTreeNodes [inherited]
+
+
+

[in] Maximum number of RangeTree nodes; must be >= maxNumRanges

+
+
+
+ uint32_t CUpti_Profiler_CounterDataImageOptions::maxNumRanges [inherited]
+
+
+

[in] Maximum number of ranges that can be profiled

+
+
+
+ uint32_t CUpti_Profiler_CounterDataImageOptions::maxRangeNameLength [inherited]
+
+
+

[in] Maximum string length of each RangeName, including the trailing NULL character

+
+
+
+ const + + uint8_t + * CUpti_Profiler_CounterDataImageOptions::pCounterDataPrefix [inherited]
+
+
+

[in] Address of CounterDataPrefix generated from NVPW_CounterDataBuilder_GetCounterDataPrefix(). Must be align(8).

+
+
+
+ + + void + * CUpti_Profiler_CounterDataImageOptions::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_CounterDataImageOptions::structSize [inherited]
+
+
+

[in] CUpti_Profiler_CounterDataImageOptions_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.122. CUpti_Profiler_DeInitialize_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  structSize
+
[in] CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ + + void + * CUpti_Profiler_DeInitialize_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_DeInitialize_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.123. CUpti_Profiler_DeviceSupported_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
CUpti_Profiler_Support_Level architecture
+
[out] SUPPORTED if the device architecture level supports the Profiling API (Compute Capability >= 7.0), UNSUPPORTED otherwise +
+
CUpti_Profiler_Support_Level cmp
+
[out] SUPPORTED if not NVIDIA Crypto Mining Processors (CMP), UNSUPPORTED otherwise
+
CUpti_Profiler_Support_Level confidentialCompute
+
[out] SUPPORTED if confidential compute is not enabled, UNSUPPORTED otherwise
+
CUdevice  cuDevice
+
[in] if NULL, the current CUcontext is used
+
CUpti_Profiler_Support_Level isSupported
+
[out] overall SUPPORTED / UNSUPPORTED flag representing whether Profiling and PC Sampling APIs work on the given device and + configuration. SUPPORTED if all following flags are SUPPORTED, UNSUPPORTED otherwise.
+
+ void + * pPriv
+
[in] assign to NULL
+
CUpti_Profiler_Support_Level sli
+
[out] SUPPORTED if SLI is not enabled, UNSUPPORTED otherwise
+
size_t  structSize
+
[in] Must be CUpti_Profiler_DeviceSupported_Params_STRUCT_SIZE
+
CUpti_Profiler_Support_Level vGpu
+
[out] SUPPORTED if vGPU is supported and profiling is enabled, DISABLED if profiling is supported but not enabled, UNSUPPORTED + otherwise
+
+
+

Variables

+
+
+ CUpti_Profiler_Support_LevelCUpti_Profiler_DeviceSupported_Params::architecture [inherited]
+
+
+

[out] SUPPORTED if the device architecture level supports the Profiling API (Compute Capability >= 7.0), UNSUPPORTED otherwise + +

+
+
+
+ CUpti_Profiler_Support_LevelCUpti_Profiler_DeviceSupported_Params::cmp [inherited]
+
+
+

[out] SUPPORTED if not NVIDIA Crypto Mining Processors (CMP), UNSUPPORTED otherwise

+
+
+
+ CUpti_Profiler_Support_LevelCUpti_Profiler_DeviceSupported_Params::confidentialCompute [inherited]
+
+
+

[out] SUPPORTED if confidential compute is not enabled, UNSUPPORTED otherwise

+
+
+
+ CUdevice CUpti_Profiler_DeviceSupported_Params::cuDevice [inherited]
+
+
+

[in] if NULL, the current CUcontext is used

+
+
+
+ CUpti_Profiler_Support_LevelCUpti_Profiler_DeviceSupported_Params::isSupported [inherited]
+
+
+

[out] overall SUPPORTED / UNSUPPORTED flag representing whether Profiling and PC Sampling APIs work on the given device and + configuration. SUPPORTED if all following flags are SUPPORTED, UNSUPPORTED otherwise. +

+
+
+
+ + + void + * CUpti_Profiler_DeviceSupported_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ CUpti_Profiler_Support_LevelCUpti_Profiler_DeviceSupported_Params::sli [inherited]
+
+
+

[out] SUPPORTED if SLI is not enabled, UNSUPPORTED otherwise

+
+
+
+ size_t CUpti_Profiler_DeviceSupported_Params::structSize [inherited]
+
+
+

[in] Must be CUpti_Profiler_DeviceSupported_Params_STRUCT_SIZE

+
+
+
+ CUpti_Profiler_Support_LevelCUpti_Profiler_DeviceSupported_Params::vGpu [inherited]
+
+
+

[out] SUPPORTED if vGPU is supported and profiling is enabled, DISABLED if profiling is supported but not enabled, UNSUPPORTED + otherwise +

+
+
+
+
+
+
+

6.124. CUpti_Profiler_DisableProfiling_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
CUcontext  ctx
+
[in] if NULL, the current CUcontext is used
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  structSize
+
[in] CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ CUcontext CUpti_Profiler_DisableProfiling_Params::ctx [inherited]
+
+
+

[in] if NULL, the current CUcontext is used

+
+
+
+ + + void + * CUpti_Profiler_DisableProfiling_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_DisableProfiling_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.125. CUpti_Profiler_EnableProfiling_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
CUcontext  ctx
+
[in] if NULL, the current CUcontext is used
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  structSize
+
[in] CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ CUcontext CUpti_Profiler_EnableProfiling_Params::ctx [inherited]
+
+
+

[in] if NULL, the current CUcontext is used

+
+
+
+ + + void + * CUpti_Profiler_EnableProfiling_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_EnableProfiling_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.126. CUpti_Profiler_EndPass_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
uint8_t  allPassesSubmitted
+
[out] becomes true when the last pass has been queued to the GPU
+
CUcontext  ctx
+
[in] if NULL, the current CUcontext is used
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  passIndex
+
[out] The targetNestingLevel that will be collected by the *next* BeginPass.
+
size_t  structSize
+
[in] CUpti_Profiler_EndPass_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ uint8_t CUpti_Profiler_EndPass_Params::allPassesSubmitted [inherited]
+
+
+

[out] becomes true when the last pass has been queued to the GPU

+
+
+
+ CUcontext CUpti_Profiler_EndPass_Params::ctx [inherited]
+
+
+

[in] if NULL, the current CUcontext is used

+
+
+
+ + + void + * CUpti_Profiler_EndPass_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_EndPass_Params::passIndex [inherited]
+
+
+

[out] The targetNestingLevel that will be collected by the *next* BeginPass. [out] The passIndex that will be collected by + the *next* BeginPass +

+
+
+
+ size_t CUpti_Profiler_EndPass_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_EndPass_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.127. CUpti_Profiler_EndSession_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
CUcontext  ctx
+
[in] if NULL, the current CUcontext is used
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  structSize
+
[in] CUpti_Profiler_EndSession_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ CUcontext CUpti_Profiler_EndSession_Params::ctx [inherited]
+
+
+

[in] if NULL, the current CUcontext is used

+
+
+
+ + + void + * CUpti_Profiler_EndSession_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_EndSession_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_EndSession_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.128. CUpti_Profiler_FlushCounterData_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
CUcontext  ctx
+
[in] if NULL, the current CUcontext is used
+
size_t  numRangesDropped
+
[out] number of ranges whose data was dropped in the processed passes
+
size_t  numTraceBytesDropped
+
[out] number of bytes not written to TraceBuffer due to buffer full
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  structSize
+
[in] CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ CUcontext CUpti_Profiler_FlushCounterData_Params::ctx [inherited]
+
+
+

[in] if NULL, the current CUcontext is used

+
+
+
+ size_t CUpti_Profiler_FlushCounterData_Params::numRangesDropped [inherited]
+
+
+

[out] number of ranges whose data was dropped in the processed passes

+
+
+
+ size_t CUpti_Profiler_FlushCounterData_Params::numTraceBytesDropped [inherited]
+
+
+

[out] number of bytes not written to TraceBuffer due to buffer full

+
+
+
+ + + void + * CUpti_Profiler_FlushCounterData_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_FlushCounterData_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.129. CUpti_Profiler_GetCounterAvailability_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
size_t  counterAvailabilityImageSize
+
+
CUcontext  ctx
+
[in] if NULL, the current CUcontext is used
+
+ uint8_t + * pCounterAvailabilityImage
+
[in] buffer receiving counter availability image, may be NULL
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  structSize
+
[in] CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ size_t CUpti_Profiler_GetCounterAvailability_Params::counterAvailabilityImageSize [inherited]
+
+
+

[in/out] If `pCounterAvailabilityImage` is NULL, then the required size is returned in `counterAvailabilityImageSize`, otherwise + `counterAvailabilityImageSize` should be set to the size of `pCounterAvailabilityImage`, and on return it would be overwritten + with number of actual bytes copied +

+
+
+
+ CUcontext CUpti_Profiler_GetCounterAvailability_Params::ctx [inherited]
+
+
+

[in] if NULL, the current CUcontext is used

+
+
+
+ + + uint8_t + * CUpti_Profiler_GetCounterAvailability_Params::pCounterAvailabilityImage [inherited]
+
+
+

[in] buffer receiving counter availability image, may be NULL

+
+
+
+ + + void + * CUpti_Profiler_GetCounterAvailability_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_GetCounterAvailability_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.130. CUpti_Profiler_Initialize_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  structSize
+
[in] CUpti_Profiler_Initialize_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ + + void + * CUpti_Profiler_Initialize_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_Initialize_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_Initialize_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.131. CUpti_Profiler_IsPassCollected_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
uint8_t  allPassesCollected
+
[out] becomes true when the last pass has been decoded
+
CUcontext  ctx
+
[in] if NULL, the current CUcontext is used
+
size_t  numRangesDropped
+
[out] number of ranges whose data was dropped in the processed pass
+
size_t  numTraceBytesDropped
+
[out] number of bytes not written to TraceBuffer due to buffer full
+
uint8_t  onePassCollected
+
[out] true if a pass was successfully decoded
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  structSize
+
[in] CUpti_Profiler_IsPassCollected_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ uint8_t CUpti_Profiler_IsPassCollected_Params::allPassesCollected [inherited]
+
+
+

[out] becomes true when the last pass has been decoded

+
+
+
+ CUcontext CUpti_Profiler_IsPassCollected_Params::ctx [inherited]
+
+
+

[in] if NULL, the current CUcontext is used

+
+
+
+ size_t CUpti_Profiler_IsPassCollected_Params::numRangesDropped [inherited]
+
+
+

[out] number of ranges whose data was dropped in the processed pass

+
+
+
+ size_t CUpti_Profiler_IsPassCollected_Params::numTraceBytesDropped [inherited]
+
+
+

[out] number of bytes not written to TraceBuffer due to buffer full

+
+
+
+ uint8_t CUpti_Profiler_IsPassCollected_Params::onePassCollected [inherited]
+
+
+

[out] true if a pass was successfully decoded

+
+
+
+ + + void + * CUpti_Profiler_IsPassCollected_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_IsPassCollected_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_IsPassCollected_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.132. CUpti_Profiler_SetConfig_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
size_t  configSize
+
[in] size of config
+
CUcontext  ctx
+
[in] if NULL, the current CUcontext is used
+
uint16_t  minNestingLevel
+
[in] the lowest nesting level to be profiled; must be >= 1
+
uint16_t  numNestingLevels
+
[in] the number of nesting levels to profile; must be >= 1
+
const + uint8_t + * pConfig
+
[in] Config created by NVPW_RawMetricsConfig_GetConfigImage(). Must be align(8).
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  passIndex
+
[in] Set this to zero for in-app replay; set this to the output of EndPass() for application replay
+
size_t  structSize
+
[in] CUpti_Profiler_SetConfig_Params_STRUCT_SIZE
+
uint16_t  targetNestingLevel
+
[in] Set this to minNestingLevel for in-app replay; set this to the output of EndPass() for application
+
+
+

Variables

+
+
+ size_t CUpti_Profiler_SetConfig_Params::configSize [inherited]
+
+
+

[in] size of config

+
+
+
+ CUcontext CUpti_Profiler_SetConfig_Params::ctx [inherited]
+
+
+

[in] if NULL, the current CUcontext is used

+
+
+
+ uint16_t CUpti_Profiler_SetConfig_Params::minNestingLevel [inherited]
+
+
+

[in] the lowest nesting level to be profiled; must be >= 1

+
+
+
+ uint16_t CUpti_Profiler_SetConfig_Params::numNestingLevels [inherited]
+
+
+

[in] the number of nesting levels to profile; must be >= 1

+
+
+
+ const + + uint8_t + * CUpti_Profiler_SetConfig_Params::pConfig [inherited]
+
+
+

[in] Config created by NVPW_RawMetricsConfig_GetConfigImage(). Must be align(8).

+
+
+
+ + + void + * CUpti_Profiler_SetConfig_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_SetConfig_Params::passIndex [inherited]
+
+
+

[in] Set this to zero for in-app replay; set this to the output of EndPass() for application replay

+
+
+
+ size_t CUpti_Profiler_SetConfig_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_SetConfig_Params_STRUCT_SIZE

+
+
+
+ uint16_t CUpti_Profiler_SetConfig_Params::targetNestingLevel [inherited]
+
+
+

[in] Set this to minNestingLevel for in-app replay; set this to the output of EndPass() for application

+
+
+
+
+
+
+

6.133. CUpti_Profiler_UnsetConfig_Params Struct Reference

+

[CUPTI Profiling API] +

+
+

+
+

Public Variables

+
+
CUcontext  ctx
+
[in] if NULL, the current CUcontext is used
+
+ void + * pPriv
+
[in] assign to NULL
+
size_t  structSize
+
[in] CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE
+
+
+

Variables

+
+
+ CUcontext CUpti_Profiler_UnsetConfig_Params::ctx [inherited]
+
+
+

[in] if NULL, the current CUcontext is used

+
+
+
+ + + void + * CUpti_Profiler_UnsetConfig_Params::pPriv [inherited]
+
+
+

[in] assign to NULL

+
+
+
+ size_t CUpti_Profiler_UnsetConfig_Params::structSize [inherited]
+
+
+

[in] CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE

+
+
+
+
+
+
+

6.134. CUpti_ResourceData Struct Reference

+

[CUPTI Callback API] +

+
+

Data passed into a resource callback function as the cbdata argument to CUpti_CallbackFunc. The cbdata will be this type for domain equal to CUPTI_CB_DOMAIN_RESOURCE. The callback data is valid only within the invocation of the callback function that is + passed the data. If you need to retain some data for use outside of the callback, you must make a copy of that data. +

+
+

Public Variables

+
+
CUcontext  context
+
+
+ void + * resourceDescriptor
+
+
CUstream  stream
+
+
+
+

Variables

+
+
+ CUcontext CUpti_ResourceData::context [inherited]
+
+
+

For CUPTI_CBID_RESOURCE_CONTEXT_CREATED and CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING, the context being created or destroyed. + For CUPTI_CBID_RESOURCE_STREAM_CREATED and CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING, the context containing the stream + being created or destroyed. +

+
+
+
+ + + void + * CUpti_ResourceData::resourceDescriptor [inherited]
+
+
+

Reserved for future use.

+
+
+
+ CUstream CUpti_ResourceData::stream [inherited]
+
+
+

For CUPTI_CBID_RESOURCE_STREAM_CREATED and CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING, the stream being created or destroyed. + +

+
+
+
+
+
+
+

6.135. CUpti_SynchronizeData Struct Reference

+

[CUPTI Callback API] +

+
+

Data passed into a synchronize callback function as the cbdata argument to CUpti_CallbackFunc. The cbdata will be this type for domain equal to CUPTI_CB_DOMAIN_SYNCHRONIZE. The callback data is valid only within the invocation of the callback function that + is passed the data. If you need to retain some data for use outside of the callback, you must make a copy of that data. +

+
+

Public Variables

+
+
CUcontext  context
+
+
CUstream  stream
+
+
+
+

Variables

+
+
+ CUcontext CUpti_SynchronizeData::context [inherited]
+
+
+

The context of the stream being synchronized.

+
+
+
+ CUstream CUpti_SynchronizeData::stream [inherited]
+
+
+

The stream being synchronized.

+
+
+
+
+
+
+

6.136. Header Struct Reference

+

[CUPTI PC Sampling Utility API] +

+
+

+
+

Public Variables

+
+
uint32_t  totalBuffers
+
+
uint32_t  version
+
+
+
+

Variables

+
+
+ uint32_t Header::totalBuffers [inherited]
+
+
+

Total number of buffers present in the file.

+
+
+
+ uint32_t Header::version [inherited]
+
+
+

Version of file format.

+
+
+
+
+
+
+

6.137. NV::Cupti::Checkpoint::CUpti_Checkpoint Struct Reference

+

[CUPTI Checkpoint API] +

+
+

A CUptiCheckpoint object should be initialized with desired options prior to passing into any CUPTI Checkpoint API function. + The first call into a Checkpoint API function will initialize internal state based on these options. Subsequent changes to + these options will not have any effect. +

+

Checkpoint data is saved in device, host, and filesystem space. There are options to reserve memory at each level (device, + host, filesystem) which are intended to allow a guarantee that a certain amount of memory will remain free for use after the + checkpoint is saved. Note, however, that falling back to slower levels of memory (host, and then filesystem) to save the checkpoint + will result in performance degradation. Currently, the filesystem limitation is not implemented. Note that falling back to + filesystem storage may significantly impact the performance for saving and restoring a checkpoint. +

+
+

Public Variables

+
+
uint8_t  allowOverwrite
+
[in] Boolean, Allow checkpoint to save over existing checkpoint
+
CUcontext  ctx
+
[in] Set to context to save from, or will use current context if NULL
+
uint8_t  optimizations
+
[in] Mask of CUpti_CheckpointOptimizations flags for this checkpoint
+
+ void + * pPriv
+
[in] Assign to NULL
+
size_t  reserveDeviceMB
+
[in] Restrict checkpoint from using last N MB of device memory (-1 = use no device memory)
+
size_t  reserveHostMB
+
[in] Restrict checkpoint from using last N MB of host memory (-1 = use no host memory)
+
size_t  structSize
+
[in] Must be set to CUpti_Checkpoint_STRUCT_SIZE
+
+
+

Variables

+
+
+ uint8_t NV::​Cupti::​Checkpoint::​CUpti_Checkpoint::allowOverwrite [inherited]
+
+
+

[in] Boolean, Allow checkpoint to save over existing checkpoint

+
+
+
+ CUcontext NV::​Cupti::​Checkpoint::​CUpti_Checkpoint::ctx [inherited]
+
+
+

[in] Set to context to save from, or will use current context if NULL

+
+
+
+ uint8_t NV::​Cupti::​Checkpoint::​CUpti_Checkpoint::optimizations [inherited]
+
+
+

[in] Mask of CUpti_CheckpointOptimizations flags for this checkpoint

+
+
+
+ + + void + * NV::​Cupti::​Checkpoint::​CUpti_Checkpoint::pPriv [inherited]
+
+
+

[in] Assign to NULL

+
+
+
+ size_t NV::​Cupti::​Checkpoint::​CUpti_Checkpoint::reserveDeviceMB [inherited]
+
+
+

[in] Restrict checkpoint from using last N MB of device memory (-1 = use no device memory)

+
+
+
+ size_t NV::​Cupti::​Checkpoint::​CUpti_Checkpoint::reserveHostMB [inherited]
+
+
+

[in] Restrict checkpoint from using last N MB of host memory (-1 = use no host memory)

+
+
+
+ size_t NV::​Cupti::​Checkpoint::​CUpti_Checkpoint::structSize [inherited]
+
+
+

[in] Must be set to CUpti_Checkpoint_STRUCT_SIZE

+
+
+
+
+
+
+

6.138. PcSamplingStallReasons Struct Reference

+

[CUPTI PC Sampling Utility API] +

+
+

+
+

Public Variables

+
+
size_t  numStallReasons
+
+
+ uint32_t + * stallReasonIndex
+
+
+ + + * stallReasons
+
+
+
+

Variables

+
+
+ size_t PcSamplingStallReasons::numStallReasons [inherited]
+
+
+

Number of all available stall reasons

+
+
+
+ + + uint32_t + * PcSamplingStallReasons::stallReasonIndex [inherited]
+
+
+

Stall reason index of all available stall reasons

+
+
+
+ + + + + * PcSamplingStallReasons::stallReasons [inherited]
+
+
+

Stall reasons names of all available stall reasons

+
+
+
+
+
+
+ +
+ +
+
+
+ + + + + + \ No newline at end of file diff --git a/doc/Cupti/classes.html b/doc/Cupti/classes.html new file mode 100644 index 0000000000000000000000000000000000000000..18385467364316c245eb252aae9a4f1b6c08903a --- /dev/null +++ b/doc/Cupti/classes.html @@ -0,0 +1,36 @@ + + +Cupti: Alphabetical List + + + + + +
+

Data Structure Index

B | C | H | P

+ +
  B  
+
CUpti_ActivityInstructionCorrelation   CUpti_ActivityMemset2   CUpti_ActivitySynchronization   CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params   
BufferInfo   CUpti_ActivityInstructionExecution   CUpti_ActivityMemset3   CUpti_ActivityUnifiedMemoryCounter   CUpti_Profiler_CounterDataImage_CalculateSize_Params   
  C  
+
CUpti_ActivityJit   CUpti_ActivityMemset4   CUpti_ActivityUnifiedMemoryCounter2   CUpti_Profiler_CounterDataImage_Initialize_Params   
CUpti_Activity   CUpti_ActivityKernel   CUpti_ActivityMetric   CUpti_ActivityUnifiedMemoryCounterConfig   CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params   
CUpti_ActivityAPI   CUpti_ActivityKernel2   CUpti_ActivityMetricInstance   CUpti_CallbackData   CUpti_Profiler_CounterDataImageOptions   
CUpti_ActivityAutoBoostState   CUpti_ActivityKernel3   CUpti_ActivityModule   CUpti_Checkpoint (NV::Cupti::Checkpoint)   CUpti_Profiler_DeInitialize_Params   
CUpti_ActivityBranch   CUpti_ActivityKernel4   CUpti_ActivityName   CUpti_EventGroupSet   CUpti_Profiler_DeviceSupported_Params   
CUpti_ActivityBranch2   CUpti_ActivityKernel5   CUpti_ActivityNvLink   CUpti_EventGroupSets   CUpti_Profiler_DisableProfiling_Params   
CUpti_ActivityCdpKernel   CUpti_ActivityKernel6   CUpti_ActivityNvLink2   CUpti_GetCubinCrcParams   CUpti_Profiler_EnableProfiling_Params   
CUpti_ActivityContext   CUpti_ActivityKernel7   CUpti_ActivityNvLink3   CUpti_GetSassToSourceCorrelationParams   CUpti_Profiler_EndPass_Params   
CUpti_ActivityCudaEvent   CUpti_ActivityKernel8   CUpti_ActivityNvLink4   CUpti_GraphData   CUpti_Profiler_EndSession_Params   
CUpti_ActivityDevice   CUpti_ActivityMarker   CUpti_ActivityObjectKindId   CUpti_MetricValue   CUpti_Profiler_FlushCounterData_Params   
CUpti_ActivityDevice2   CUpti_ActivityMarker2   CUpti_ActivityOpenAcc   CUpti_ModuleResourceData   CUpti_Profiler_GetCounterAvailability_Params   
CUpti_ActivityDevice3   CUpti_ActivityMarkerData   CUpti_ActivityOpenAccData   CUpti_NvtxData   CUpti_Profiler_Initialize_Params   
CUpti_ActivityDevice4   CUpti_ActivityMemcpy   CUpti_ActivityOpenAccLaunch   CUpti_PCSamplingConfigurationInfo   CUpti_Profiler_IsPassCollected_Params   
CUpti_ActivityDeviceAttribute   CUpti_ActivityMemcpy3   CUpti_ActivityOpenAccOther   CUpti_PCSamplingConfigurationInfoParams   CUpti_Profiler_SetConfig_Params   
CUpti_ActivityEnvironment   CUpti_ActivityMemcpy4   CUpti_ActivityOpenMp   CUpti_PCSamplingData   CUpti_Profiler_UnsetConfig_Params   
CUpti_ActivityEvent   CUpti_ActivityMemcpy5   CUpti_ActivityOverhead   CUpti_PCSamplingDisableParams   CUpti_ResourceData   
CUpti_ActivityEventInstance   CUpti_ActivityMemcpyPtoP   CUpti_ActivityPcie   CUpti_PCSamplingEnableParams   CUpti_SynchronizeData   
CUpti_ActivityExternalCorrelation   CUpti_ActivityMemcpyPtoP2   CUpti_ActivityPCSampling   CUpti_PCSamplingGetDataParams   CUptiUtil_GetBufferInfoParams (CUPTI::PcSamplingUtil)   
CUpti_ActivityFunction   CUpti_ActivityMemcpyPtoP3   CUpti_ActivityPCSampling2   CUpti_PCSamplingGetNumStallReasonsParams   CUptiUtil_GetHeaderDataParams (CUPTI::PcSamplingUtil)   
CUpti_ActivityGlobalAccess   CUpti_ActivityMemcpyPtoP4   CUpti_ActivityPCSampling3   CUpti_PCSamplingGetStallReasonsParams   CUptiUtil_GetPcSampDataParams (CUPTI::PcSamplingUtil)   
CUpti_ActivityGlobalAccess2   CUpti_ActivityMemory   CUpti_ActivityPCSamplingConfig   CUpti_PCSamplingPCData   CUptiUtil_MergePcSampDataParams (CUPTI::PcSamplingUtil)   
CUpti_ActivityGlobalAccess3   CUpti_ActivityMemory2   CUpti_ActivityPCSamplingRecordInfo   CUpti_PCSamplingStallReason   CUptiUtil_PutPcSampDataParams (CUPTI::PcSamplingUtil)   
CUpti_ActivityGraphTrace   CUpti_ActivityMemory3   CUpti_ActivityPreemption   CUpti_PCSamplingStartParams   
  H  
+
CUpti_ActivityInstantaneousEvent   CUpti_ActivityMemory3::PACKED_ALIGNMENT   CUpti_ActivitySharedAccess   CUpti_PCSamplingStopParams   Header   
CUpti_ActivityInstantaneousEventInstance   CUpti_ActivityMemoryPool   CUpti_ActivitySourceLocator   CUpti_Profiler_BeginPass_Params   
  P  
+
CUpti_ActivityInstantaneousMetric   CUpti_ActivityMemoryPool2   CUpti_ActivityStream   CUpti_Profiler_BeginSession_Params   PcSamplingStallReasons   
CUpti_ActivityInstantaneousMetricInstance   CUpti_ActivityMemset   

B | C | H | P

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/doxygen.css b/doc/Cupti/doxygen.css new file mode 100644 index 0000000000000000000000000000000000000000..3767dc957b0dd2268ea831deb86dae6b7a7222a0 --- /dev/null +++ b/doc/Cupti/doxygen.css @@ -0,0 +1,441 @@ +body, table, div, p, dl { + font-family: Lucida Grande, Verdana, Geneva, Arial, sans-serif; + font-size: 12px; +} + +/* @group Heading Levels */ + +h1 { + text-align: center; + font-size: 150%; +} + +h2 { + font-size: 120%; +} + +h3 { + font-size: 100%; +} + +/* @end */ + +caption { + font-weight: bold; +} + +div.qindex, div.navtab{ + background-color: #e8eef2; + border: 1px solid #84b0c7; + text-align: center; + margin: 2px; + padding: 2px; +} + +div.qindex, div.navpath { + width: 100%; + line-height: 140%; +} + +div.navtab { + margin-right: 15px; +} + +/* @group Link Styling */ + +a { + color: #153788; + font-weight: normal; + text-decoration: none; +} + +.contents a:visited { + color: #1b77c5; +} + +a:hover { + text-decoration: underline; +} + +a.qindex { + font-weight: bold; +} + +a.qindexHL { + font-weight: bold; + background-color: #6666cc; + color: #ffffff; + border: 1px double #9295C2; +} + +.contents a.qindexHL:visited { + color: #ffffff; +} + +a.el { + font-weight: bold; +} + +a.elRef { +} + +a.code { +} + +a.codeRef { +} + +/* @end */ + +dl.el { + margin-left: -1cm; +} + +.fragment { + font-family: monospace, fixed; + font-size: 105%; +} + +pre.fragment { + border: 1px solid #CCCCCC; + background-color: #f5f5f5; + padding: 4px 6px; + margin: 4px 8px 4px 2px; +} + +div.ah { + background-color: black; + font-weight: bold; + color: #ffffff; + margin-bottom: 3px; + margin-top: 3px +} + +div.groupHeader { + margin-left: 16px; + margin-top: 12px; + margin-bottom: 6px; + font-weight: bold; +} + +div.groupText { + margin-left: 16px; + font-style: italic; +} + +body { + background: white; + color: black; + margin-right: 20px; + margin-left: 20px; +} + +td.indexkey { + background-color: #e8eef2; + font-weight: bold; + border: 1px solid #CCCCCC; + margin: 2px 0px 2px 0; + padding: 2px 10px; +} + +td.indexvalue { + background-color: #e8eef2; + border: 1px solid #CCCCCC; + padding: 2px 10px; + margin: 2px 0px; +} + +tr.memlist { + background-color: #f0f0f0; +} + +p.formulaDsp { + text-align: center; +} + +img.formulaDsp { + +} + +img.formulaInl { + vertical-align: middle; +} + +/* @group Code Colorization */ + +span.keyword { + color: #008000 +} + +span.keywordtype { + color: #604020 +} + +span.keywordflow { + color: #e08000 +} + +span.comment { + color: #800000 +} + +span.preprocessor { + color: #806020 +} + +span.stringliteral { + color: #002080 +} + +span.charliteral { + color: #008080 +} + +span.vhdldigit { + color: #ff00ff +} + +span.vhdlchar { + color: #000000 +} + +span.vhdlkeyword { + color: #700070 +} + +span.vhdllogic { + color: #ff0000 +} + +/* @end */ + +.search { + color: #003399; + font-weight: bold; +} + +form.search { + margin-bottom: 0px; + margin-top: 0px; +} + +input.search { + font-size: 75%; + color: #000080; + font-weight: normal; + background-color: #e8eef2; +} + +td.tiny { + font-size: 75%; +} + +.dirtab { + padding: 4px; + border-collapse: collapse; + border: 1px solid #84b0c7; +} + +th.dirtab { + background: #e8eef2; + font-weight: bold; +} + +hr { + height: 0; + border: none; + border-top: 1px solid #666; +} + +/* @group Member Descriptions */ + +.mdescLeft, .mdescRight, +.memItemLeft, .memItemRight, +.memTemplItemLeft, .memTemplItemRight, .memTemplParams { + background-color: #FAFAFA; + border: none; + margin: 4px; + padding: 1px 0 0 8px; +} + +.mdescLeft, .mdescRight { + padding: 0px 8px 4px 8px; + color: #555; +} + +.memItemLeft, .memItemRight, .memTemplParams { + border-top: 1px solid #ccc; +} + +.memTemplParams { + color: #606060; +} + +/* @end */ + +/* @group Member Details */ + +/* Styles for detailed member documentation */ + +.memtemplate { + font-size: 80%; + color: #606060; + font-weight: normal; + margin-left: 3px; +} + +.memnav { + background-color: #e8eef2; + border: 1px solid #84b0c7; + text-align: center; + margin: 2px; + margin-right: 15px; + padding: 2px; +} + +.memitem { + padding: 0; +} + +.memname { + white-space: nowrap; + font-weight: bold; +} + +.memproto, .memdoc { + border: 1px solid #84b0c7; +} + +.memproto { + padding: 0; + background-color: #d5e1e8; + font-weight: bold; + -webkit-border-top-left-radius: 8px; + -webkit-border-top-right-radius: 8px; + -moz-border-radius-topleft: 8px; + -moz-border-radius-topright: 8px; +} + +.memdoc { + padding: 2px 5px; + background-color: #eef3f5; + border-top-width: 0; + -webkit-border-bottom-left-radius: 8px; + -webkit-border-bottom-right-radius: 8px; + -moz-border-radius-bottomleft: 8px; + -moz-border-radius-bottomright: 8px; +} + +.paramkey { + text-align: right; +} + +.paramtype { + white-space: nowrap; +} + +.paramname { + color: #602020; + white-space: nowrap; +} +.paramname em { + font-style: normal; +} + +/* @end */ + +/* @group Directory (tree) */ + +/* for the tree view */ + +.ftvtree { + font-family: sans-serif; + margin: 0.5em; +} + +/* these are for tree view when used as main index */ + +.directory { + font-size: 9pt; + font-weight: bold; +} + +.directory h3 { + margin: 0px; + margin-top: 1em; + font-size: 11pt; +} + +/* +The following two styles can be used to replace the root node title +with an image of your choice. Simply uncomment the next two styles, +specify the name of your image and be sure to set 'height' to the +proper pixel height of your image. +*/ + +/* +.directory h3.swap { + height: 61px; + background-repeat: no-repeat; + background-image: url("yourimage.gif"); +} +.directory h3.swap span { + display: none; +} +*/ + +.directory > h3 { + margin-top: 0; +} + +.directory p { + margin: 0px; + white-space: nowrap; +} + +.directory div { + display: none; + margin: 0px; +} + +.directory img { + vertical-align: -30%; +} + +/* these are for tree view when not used as main index */ + +.directory-alt { + font-size: 100%; + font-weight: bold; +} + +.directory-alt h3 { + margin: 0px; + margin-top: 1em; + font-size: 11pt; +} + +.directory-alt > h3 { + margin-top: 0; +} + +.directory-alt p { + margin: 0px; + white-space: nowrap; +} + +.directory-alt div { + display: none; + margin: 0px; +} + +.directory-alt img { + vertical-align: -30%; +} + +/* @end */ + +address { + font-style: normal; + color: #333; +} diff --git a/doc/Cupti/doxygen.png b/doc/Cupti/doxygen.png new file mode 100644 index 0000000000000000000000000000000000000000..f0a274bbaffdd67f6d784c894d9cf28729db0e14 Binary files /dev/null and b/doc/Cupti/doxygen.png differ diff --git a/doc/Cupti/ftv2blank.png b/doc/Cupti/ftv2blank.png new file mode 100644 index 0000000000000000000000000000000000000000..493c3c0b615ade5b22027bde773faf2c0e076d66 Binary files /dev/null and b/doc/Cupti/ftv2blank.png differ diff --git a/doc/Cupti/ftv2doc.png b/doc/Cupti/ftv2doc.png new file mode 100644 index 0000000000000000000000000000000000000000..f72999f92172cca6edaa2538286b3e369bec9f49 Binary files /dev/null and b/doc/Cupti/ftv2doc.png differ diff --git a/doc/Cupti/ftv2folderclosed.png b/doc/Cupti/ftv2folderclosed.png new file mode 100644 index 0000000000000000000000000000000000000000..d6d063440cbf13c4128dacd96661b6fce58abf26 Binary files /dev/null and b/doc/Cupti/ftv2folderclosed.png differ diff --git a/doc/Cupti/ftv2folderopen.png b/doc/Cupti/ftv2folderopen.png new file mode 100644 index 0000000000000000000000000000000000000000..bbe2c913cf493ee37ad8e3a5132382138d93ac92 Binary files /dev/null and b/doc/Cupti/ftv2folderopen.png differ diff --git a/doc/Cupti/ftv2lastnode.png b/doc/Cupti/ftv2lastnode.png new file mode 100644 index 0000000000000000000000000000000000000000..e7b9ba90cb0cf71c8ce662956bfee7d64cf60fa6 Binary files /dev/null and b/doc/Cupti/ftv2lastnode.png differ diff --git a/doc/Cupti/ftv2link.png b/doc/Cupti/ftv2link.png new file mode 100644 index 0000000000000000000000000000000000000000..14f3fed003659b11214ac7a1ca0efa2b9145ce9e Binary files /dev/null and b/doc/Cupti/ftv2link.png differ diff --git a/doc/Cupti/ftv2mlastnode.png b/doc/Cupti/ftv2mlastnode.png new file mode 100644 index 0000000000000000000000000000000000000000..09ceb6adb01054ce799ad20c0e818ab9272f2df2 Binary files /dev/null and b/doc/Cupti/ftv2mlastnode.png differ diff --git a/doc/Cupti/ftv2mnode.png b/doc/Cupti/ftv2mnode.png new file mode 100644 index 0000000000000000000000000000000000000000..3254c05112199fbc80aad313611c58a5b388792d Binary files /dev/null and b/doc/Cupti/ftv2mnode.png differ diff --git a/doc/Cupti/ftv2node.png b/doc/Cupti/ftv2node.png new file mode 100644 index 0000000000000000000000000000000000000000..c9f06a57f4cfe0f9851cc1aacd7245f741b53ad1 Binary files /dev/null and b/doc/Cupti/ftv2node.png differ diff --git a/doc/Cupti/ftv2plastnode.png b/doc/Cupti/ftv2plastnode.png new file mode 100644 index 0000000000000000000000000000000000000000..0b07e00913d8069ebbb51bd7fd6d70d8bba88f75 Binary files /dev/null and b/doc/Cupti/ftv2plastnode.png differ diff --git a/doc/Cupti/ftv2pnode.png b/doc/Cupti/ftv2pnode.png new file mode 100644 index 0000000000000000000000000000000000000000..2001b797ba2b98a4127f1d3efca64aef08bf6d51 Binary files /dev/null and b/doc/Cupti/ftv2pnode.png differ diff --git a/doc/Cupti/ftv2vertline.png b/doc/Cupti/ftv2vertline.png new file mode 100644 index 0000000000000000000000000000000000000000..b330f3a33c0085c183ff39fc56b1b274160c1da0 Binary files /dev/null and b/doc/Cupti/ftv2vertline.png differ diff --git a/doc/Cupti/functions.html b/doc/Cupti/functions.html new file mode 100644 index 0000000000000000000000000000000000000000..7d61ca2fa9acd07f5dc593e3142ed9a298e45fdf --- /dev/null +++ b/doc/Cupti/functions.html @@ -0,0 +1,2750 @@ + + + + + + + + + + + + + + CUPTI :: CUPTI Documentation + + + --> + + + + + + + + + + + + + + + + + + +
+ +
+ + +
+ +
+
+

7. Data Fields

+
+
+

Here is a list of all documented struct and union fields with links to the struct/union documentation for each field:

+
+ +
+

B

+
+
bandwidth
+
CUpti_ActivityNvLink
+
CUpti_ActivityNvLink2
+
CUpti_ActivityNvLink4
+
CUpti_ActivityNvLink3
+
bDumpCounterDataInFile
+
CUpti_Profiler_BeginSession_Params
+
blockX
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
CUpti_ActivityKernel2
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityPreemption
+
blockY
+
CUpti_ActivityKernel
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityPreemption
+
blockZ
+
CUpti_ActivityPreemption
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityKernel
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
bridgeId
+
CUpti_ActivityPcie
+
bufferByteSize
+
BufferInfo
+
bufferInfoData
+
CUPTI::PcSamplingUtil::CUptiUtil_GetBufferInfoParams
+
bufferType
+
CUPTI::PcSamplingUtil::CUptiUtil_PutPcSampDataParams
+
CUPTI::PcSamplingUtil::CUptiUtil_GetPcSampDataParams
+
bytes
+
CUpti_ActivityOpenAccData
+
CUpti_ActivityMemory3
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityMemset
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemcpy
+
CUpti_ActivityMemory
+
CUpti_ActivityMemset3
+
CUpti_ActivityMemcpyPtoP
+
CUpti_ActivityMemory2
+
CUpti_ActivityMemset4
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityMemset2
+
CUpti_ActivityMemcpy3
+
CUpti_ActivityMemcpy5
+
+
+
+

C

+
+
cacheConfig
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
CUpti_ActivityKernel6
+
cacheConfigExecuted
+
CUpti_ActivityKernel
+
cacheConfigRequested
+
CUpti_ActivityKernel
+
cachePath
+
CUpti_ActivityJit
+
cacheSize
+
CUpti_ActivityJit
+
callbackSite
+
CUpti_CallbackData
+
category
+
CUpti_ActivityMarkerData
+
cbid
+
CUpti_ActivityAPI
+
channelID
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityMemset4
+
channelType
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityMemset4
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
clocksThrottleReasons
+
CUpti_ActivityEnvironment
+
clusterSchedulingPolicy
+
CUpti_ActivityKernel8
+
clusterX
+
CUpti_ActivityKernel8
+
clusterY
+
CUpti_ActivityKernel8
+
clusterZ
+
CUpti_ActivityKernel8
+
cmp
+
CUpti_Profiler_DeviceSupported_Params
+
collectionModeData
+
CUpti_PCSamplingConfigurationInfo
+
collectNumPcs
+
CUpti_PCSamplingData
+
color
+
CUpti_ActivityMarkerData
+
completed
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
CUpti_ActivityCdpKernel
+
computeApiKind
+
CUpti_ActivityContext
+
computeCapabilityMajor
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
computeCapabilityMinor
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
computeInstanceId
+
CUpti_ActivityDevice4
+
confidentialCompute
+
CUpti_Profiler_DeviceSupported_Params
+
configSize
+
CUpti_Profiler_SetConfig_Params
+
constantMemorySize
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
context
+
CUpti_CallbackData
+
CUpti_ResourceData
+
CUpti_SynchronizeData
+
contextId
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityMemset
+
CUpti_ActivityMemset2
+
CUpti_ActivityMemset3
+
CUpti_ActivityMemset4
+
CUpti_ActivityMemory
+
CUpti_ActivityMemory2
+
CUpti_ActivityMemory3
+
CUpti_ActivityKernel
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityContext
+
CUpti_ActivityFunction
+
CUpti_ActivityModule
+
CUpti_ActivityCudaEvent
+
CUpti_ActivityStream
+
CUpti_ActivitySynchronization
+
CUpti_ActivityGraphTrace
+
CUpti_ActivityMemcpy
+
CUpti_ActivityMemcpy3
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityMemcpyPtoP
+
contextUid
+
CUpti_CallbackData
+
cooling
+
CUpti_ActivityEnvironment
+
copyKind
+
CUpti_ActivityMemcpy
+
CUpti_ActivityMemcpy3
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityMemcpyPtoP
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityMemcpyPtoP4
+
coreClockRate
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
correlationData
+
CUpti_CallbackData
+
correlationId
+
CUpti_ActivityKernel8
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityAPI
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityEvent
+
CUpti_ActivityEventInstance
+
CUpti_ActivityMetric
+
CUpti_ActivityMetricInstance
+
CUpti_ActivityGlobalAccess2
+
CUpti_ActivityGlobalAccess3
+
CUpti_ActivityBranch
+
CUpti_ActivityInstructionExecution
+
CUpti_ActivityPCSampling
+
CUpti_ActivityPCSampling2
+
CUpti_ActivityStream
+
CUpti_ActivityPCSamplingRecordInfo
+
CUpti_ActivitySharedAccess
+
CUpti_ActivityCudaEvent
+
CUpti_ActivitySynchronization
+
CUpti_ActivityExternalCorrelation
+
CUpti_ActivityJit
+
CUpti_ActivityGraphTrace
+
CUpti_CallbackData
+
CUpti_ActivityBranch2
+
CUpti_ActivityPCSampling3
+
CUpti_ActivityKernel7
+
CUpti_ActivityGlobalAccess
+
CUpti_ActivityKernel3
+
CUpti_ActivityMemoryPool
+
CUpti_ActivityMemcpy
+
CUpti_ActivityMemcpy3
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityMemcpyPtoP
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityMemset
+
CUpti_ActivityMemset2
+
CUpti_ActivityMemset3
+
CUpti_ActivityMemset4
+
CUpti_ActivityMemory2
+
CUpti_ActivityMemory3
+
CUpti_ActivityMemoryPool2
+
CUpti_ActivityKernel
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
counterAvailabilityImageSize
+
CUpti_Profiler_GetCounterAvailability_Params
+
counterDataImageSize
+
CUpti_Profiler_CounterDataImage_Initialize_Params
+
CUpti_Profiler_CounterDataImage_CalculateSize_Params
+
CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params
+
CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params
+
CUpti_Profiler_BeginSession_Params
+
counterDataPrefixSize
+
CUpti_Profiler_CounterDataImageOptions
+
counterDataScratchBufferSize
+
CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params
+
CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params
+
CUpti_Profiler_BeginSession_Params
+
counterKind
+
CUpti_ActivityUnifiedMemoryCounter
+
CUpti_ActivityUnifiedMemoryCounter2
+
ctx
+
CUpti_Profiler_SetConfig_Params
+
CUpti_Profiler_BeginPass_Params
+
CUpti_PCSamplingStopParams
+
CUpti_Profiler_GetCounterAvailability_Params
+
CUpti_Profiler_UnsetConfig_Params
+
CUpti_PCSamplingEnableParams
+
CUpti_PCSamplingGetStallReasonsParams
+
CUpti_Profiler_FlushCounterData_Params
+
NV::Cupti::Checkpoint::CUpti_Checkpoint
+
CUpti_PCSamplingConfigurationInfoParams
+
CUpti_PCSamplingDisableParams
+
CUpti_PCSamplingStartParams
+
CUpti_Profiler_EndPass_Params
+
CUpti_Profiler_BeginSession_Params
+
CUpti_Profiler_EnableProfiling_Params
+
CUpti_Profiler_DisableProfiling_Params
+
CUpti_Profiler_EndSession_Params
+
CUpti_Profiler_IsPassCollected_Params
+
CUpti_PCSamplingGetNumStallReasonsParams
+
CUpti_PCSamplingGetDataParams
+
cubin
+
CUpti_GetSassToSourceCorrelationParams
+
CUpti_GetCubinCrcParams
+
CUpti_ActivityModule
+
cubinCrc
+
CUpti_PCSamplingPCData
+
CUpti_GetCubinCrcParams
+
cubinSize
+
CUpti_ModuleResourceData
+
CUpti_GetCubinCrcParams
+
CUpti_ActivityModule
+
CUpti_GetSassToSourceCorrelationParams
+
cuContextId
+
CUpti_ActivityOpenAccOther
+
CUpti_ActivityOpenAccLaunch
+
CUpti_ActivityOpenAccData
+
CUpti_ActivityOpenAcc
+
cudaEventId
+
CUpti_ActivitySynchronization
+
cuDevice
+
CUpti_Profiler_DeviceSupported_Params
+
cuDeviceId
+
CUpti_ActivityOpenAccLaunch
+
CUpti_ActivityOpenAccOther
+
CUpti_ActivityOpenAccData
+
CUpti_ActivityOpenAcc
+
cuProcessId
+
CUpti_ActivityOpenAccOther
+
CUpti_ActivityOpenMp
+
CUpti_ActivityOpenAccLaunch
+
CUpti_ActivityOpenAccData
+
CUpti_ActivityOpenAcc
+
cuStreamId
+
CUpti_ActivityOpenAccData
+
CUpti_ActivityOpenAcc
+
CUpti_ActivityOpenAccOther
+
CUpti_ActivityOpenAccLaunch
+
cuThreadId
+
CUpti_ActivityOpenAccLaunch
+
CUpti_ActivityOpenAcc
+
CUpti_ActivityOpenAccData
+
CUpti_ActivityOpenAccOther
+
CUpti_ActivityOpenMp
+
+
+
+

D

+
+
dcs
+
CUpti_ActivityObjectKindId
+
dependency
+
CUpti_GraphData
+
deviceId
+
CUpti_ActivityMemcpy
+
CUpti_ActivityMemcpyPtoP
+
CUpti_ActivityMemory
+
CUpti_ActivityEnvironment
+
CUpti_ActivityUnifiedMemoryCounter
+
CUpti_ActivityMemory2
+
CUpti_ActivityPcie
+
CUpti_ActivityInstantaneousEvent
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityMemory3
+
CUpti_ActivityInstantaneousEventInstance
+
CUpti_ActivityInstantaneousMetric
+
CUpti_ActivityMemoryPool
+
CUpti_ActivityInstantaneousMetricInstance
+
CUpti_ActivityJit
+
CUpti_ActivityMemcpy3
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityMemoryPool2
+
CUpti_ActivityGraphTrace
+
CUpti_ActivityKernel
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel3
+
CUpti_ActivityUnifiedMemoryCounterConfig
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemset
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
CUpti_ActivityMemset2
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityMemset3
+
CUpti_ActivityKernel8
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityMemset4
+
CUpti_ActivityDeviceAttribute
+
CUpti_ActivityContext
+
devicePtr
+
CUpti_ActivityOpenAccData
+
devId
+
CUpti_ActivityPcie
+
dirName
+
CUpti_GetSassToSourceCorrelationParams
+
diverged
+
CUpti_ActivityBranch
+
CUpti_ActivityBranch2
+
domain
+
CUpti_ActivityEvent
+
CUpti_ActivityMarker2
+
CUpti_ActivityPcie
+
CUpti_ActivityEventInstance
+
domainId
+
CUpti_ActivityNvLink2
+
CUpti_ActivityNvLink
+
CUpti_ActivityNvLink3
+
CUpti_ActivityNvLink4
+
droppedSamples
+
CUpti_ActivityPCSamplingRecordInfo
+
CUpti_PCSamplingData
+
dstContextId
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityMemcpyPtoP
+
dstDeviceId
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityMemcpyPtoP
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityMemcpyPtoP3
+
dstId
+
CUpti_ActivityUnifiedMemoryCounter2
+
dstKind
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemcpy
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityMemcpy3
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityMemcpyPtoP
+
dynamicSharedMemory
+
CUpti_ActivityKernel
+
CUpti_ActivityKernel8
+
CUpti_ActivityKernel6
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel4
+
+
+
+

E

+
+
eccEnabled
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
enable
+
CUpti_ActivityUnifiedMemoryCounterConfig
+
enabled
+
CUpti_ActivityAutoBoostState
+
enableStartStopControlData
+
CUpti_PCSamplingConfigurationInfo
+
end
+
CUpti_ActivityMemcpy3
+
CUpti_ActivityMemset2
+
CUpti_ActivitySynchronization
+
CUpti_ActivityOpenAcc
+
CUpti_ActivityMemset3
+
CUpti_ActivityOpenAccData
+
CUpti_ActivityOpenAccLaunch
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemset4
+
CUpti_ActivityOpenAccOther
+
CUpti_ActivityOpenMp
+
CUpti_ActivityMemory
+
CUpti_ActivityJit
+
CUpti_ActivityGraphTrace
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityKernel
+
CUpti_ActivityKernel2
+
CUpti_ActivityMemcpyPtoP
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel4
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
CUpti_ActivityMemcpy
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityAPI
+
CUpti_ActivityMemset
+
CUpti_ActivityOverhead
+
CUpti_ActivityUnifiedMemoryCounter2
+
environmentKind
+
CUpti_ActivityEnvironment
+
eventGroups
+
CUpti_EventGroupSet
+
eventId
+
CUpti_ActivityCudaEvent
+
eventKind
+
CUpti_ActivityOpenAccLaunch
+
CUpti_ActivityOpenAcc
+
CUpti_ActivityOpenAccOther
+
CUpti_ActivityOpenAccData
+
CUpti_ActivityOpenMp
+
executed
+
CUpti_ActivityKernel5
+
CUpti_ActivityBranch
+
CUpti_ActivityBranch2
+
CUpti_ActivityKernel6
+
CUpti_ActivityGlobalAccess
+
CUpti_ActivityKernel7
+
CUpti_ActivityGlobalAccess2
+
CUpti_ActivityInstructionExecution
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel4
+
CUpti_ActivitySharedAccess
+
CUpti_ActivityKernel8
+
CUpti_ActivityGlobalAccess3
+
externalId
+
CUpti_ActivityOpenAccLaunch
+
CUpti_ActivityOpenAccData
+
CUpti_ActivityOpenAcc
+
CUpti_ActivityOpenAccOther
+
CUpti_ActivityExternalCorrelation
+
externalKind
+
CUpti_ActivityExternalCorrelation
+
+
+
+

F

+
+
fanSpeed
+
CUpti_ActivityEnvironment
+
fileHandler
+
CUPTI::PcSamplingUtil::CUptiUtil_GetHeaderDataParams
+
CUPTI::PcSamplingUtil::CUptiUtil_GetPcSampDataParams
+
CUPTI::PcSamplingUtil::CUptiUtil_GetBufferInfoParams
+
fileName
+
CUpti_ActivitySourceLocator
+
CUpti_GetSassToSourceCorrelationParams
+
CUPTI::PcSamplingUtil::CUptiUtil_PutPcSampDataParams
+
flag
+
CUpti_ActivityStream
+
CUpti_ActivityNvLink
+
CUpti_ActivityNvLink2
+
CUpti_ActivityNvLink3
+
CUpti_ActivityNvLink4
+
flags
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
CUpti_ActivityMemcpyPtoP
+
CUpti_ActivityDeviceAttribute
+
CUpti_ActivityMarker
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityMarker2
+
CUpti_ActivityMarkerData
+
CUpti_ActivityInstantaneousMetricInstance
+
CUpti_ActivityInstantaneousMetric
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivitySharedAccess
+
CUpti_ActivityUnifiedMemoryCounter2
+
CUpti_ActivityInstructionExecution
+
CUpti_ActivityPCSampling
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityPCSampling2
+
CUpti_ActivityPCSampling3
+
CUpti_ActivityMemset
+
CUpti_ActivityMemset2
+
CUpti_ActivityInstructionCorrelation
+
CUpti_ActivityMemset3
+
CUpti_ActivityMemset4
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice
+
CUpti_ActivityGlobalAccess3
+
CUpti_ActivityMetric
+
CUpti_ActivityMemcpy
+
CUpti_ActivityMetricInstance
+
CUpti_ActivityGlobalAccess
+
CUpti_ActivityMemcpy3
+
CUpti_ActivityGlobalAccess2
+
CUpti_ActivityMemcpy4
+
freePC
+
CUpti_ActivityMemory
+
functionId
+
CUpti_ActivityPCSampling
+
CUpti_ActivityPCSampling2
+
CUpti_ActivityInstructionExecution
+
CUpti_ActivitySharedAccess
+
CUpti_ActivityPCSampling3
+
CUpti_ActivityGlobalAccess2
+
CUpti_ActivityGlobalAccess3
+
CUpti_ActivityInstructionCorrelation
+
CUpti_ActivityBranch2
+
functionIndex
+
CUpti_ActivityFunction
+
CUpti_PCSamplingPCData
+
functionName
+
CUpti_GetSassToSourceCorrelationParams
+
CUpti_CallbackData
+
CUpti_NvtxData
+
CUpti_PCSamplingPCData
+
functionParams
+
CUpti_NvtxData
+
CUpti_CallbackData
+
functionReturnValue
+
CUpti_NvtxData
+
CUpti_CallbackData
+
+
+
+

G

+
+
globalMemoryBandwidth
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice4
+
CUpti_ActivityDevice3
+
globalMemorySize
+
CUpti_ActivityDevice4
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
gpuInstanceId
+
CUpti_ActivityDevice4
+
gpuTemperature
+
CUpti_ActivityEnvironment
+
graph
+
CUpti_GraphData
+
graphExec
+
CUpti_GraphData
+
graphId
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityMemset3
+
CUpti_ActivityMemset4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
CUpti_ActivityGraphTrace
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityMemcpyPtoP3
+
graphNodeId
+
CUpti_ActivityMemset4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
CUpti_ActivityMemcpy3
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityMemset2
+
CUpti_ActivityMemset3
+
gridId
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityPreemption
+
CUpti_ActivityKernel8
+
gridX
+
CUpti_ActivityKernel4
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
gridY
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel8
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel4
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel2
+
gridZ
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel5
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel8
+
+
+ + +
+

J

+
+
jitEntryType
+
CUpti_ActivityJit
+
jitOperationCorrelationId
+
CUpti_ActivityJit
+
jitOperationType
+
CUpti_ActivityJit
+
+
+
+

K

+
+
kind
+
CUpti_ActivityUnifiedMemoryCounterConfig
+
CUpti_ActivityGraphTrace
+
CUpti_ActivityJit
+
CUpti_ActivityInstantaneousMetricInstance
+
CUpti_ActivityInstantaneousMetric
+
CUpti_ActivityInstantaneousEventInstance
+
CUpti_ActivityInstantaneousEvent
+
CUpti_ActivityPcie
+
CUpti_ActivityNvLink4
+
CUpti_ActivityNvLink3
+
CUpti_ActivityNvLink2
+
CUpti_ActivityNvLink
+
CUpti_ActivityExternalCorrelation
+
CUpti_ActivityOpenMp
+
CUpti_ActivityOpenAccOther
+
CUpti_ActivityOpenAccLaunch
+
CUpti_ActivityOpenAccData
+
CUpti_ActivityOpenAcc
+
CUpti_ActivityInstructionCorrelation
+
CUpti_ActivitySynchronization
+
CUpti_ActivityStream
+
CUpti_ActivityCudaEvent
+
CUpti_ActivitySharedAccess
+
CUpti_ActivityModule
+
CUpti_ActivityFunction
+
CUpti_ActivityUnifiedMemoryCounter2
+
CUpti_ActivityUnifiedMemoryCounter
+
CUpti_ActivityPCSamplingRecordInfo
+
CUpti_ActivityPCSampling3
+
CUpti_ActivityPCSampling2
+
CUpti_ActivityPCSampling
+
CUpti_ActivityInstructionExecution
+
CUpti_ActivityEnvironment
+
CUpti_ActivityOverhead
+
CUpti_ActivityMarkerData
+
CUpti_ActivityMarker2
+
CUpti_ActivityMarker
+
CUpti_ActivityName
+
CUpti_ActivityContext
+
CUpti_ActivityDeviceAttribute
+
CUpti_ActivityDevice4
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice
+
CUpti_ActivityBranch2
+
CUpti_ActivityBranch
+
CUpti_ActivityGlobalAccess3
+
CUpti_ActivityGlobalAccess2
+
CUpti_ActivityGlobalAccess
+
CUpti_ActivitySourceLocator
+
CUpti_ActivityMetricInstance
+
CUpti_ActivityMetric
+
CUpti_ActivityEventInstance
+
CUpti_ActivityEvent
+
CUpti_ActivityAPI
+
CUpti_ActivityPreemption
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityKernel8
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel
+
CUpti_ActivityMemoryPool2
+
CUpti_ActivityMemoryPool
+
CUpti_ActivityMemory3
+
CUpti_ActivityMemory2
+
CUpti_ActivityMemory
+
CUpti_ActivityMemset4
+
CUpti_ActivityMemset3
+
CUpti_ActivityMemset2
+
CUpti_ActivityMemset
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityMemcpyPtoP
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemcpy3
+
CUpti_ActivityMemcpy
+
CUpti_Activity
+
+
+ +
+

M

+
+
maxBlockDimX
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice4
+
CUpti_ActivityDevice3
+
maxBlockDimY
+
CUpti_ActivityDevice4
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
maxBlockDimZ
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
maxBlocksPerMultiprocessor
+
CUpti_ActivityDevice4
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
maxGridDimX
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
maxGridDimY
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
maxGridDimZ
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
maxIPC
+
CUpti_ActivityDevice4
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
maxLaunchesPerPass
+
CUpti_Profiler_BeginSession_Params
+
maxNumRanges
+
CUpti_Profiler_CounterDataImageOptions
+
maxNumRangeTreeNodes
+
CUpti_Profiler_CounterDataImageOptions
+
maxRangeNameLength
+
CUpti_Profiler_CounterDataImageOptions
+
maxRangesPerPass
+
CUpti_Profiler_BeginSession_Params
+
maxRegistersPerBlock
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
maxRegistersPerMultiprocessor
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
maxSharedMemoryPerBlock
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice4
+
maxSharedMemoryPerMultiprocessor
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
maxThreadsPerBlock
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
maxWarpsPerMultiprocessor
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice4
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice3
+
memoryClock
+
CUpti_ActivityEnvironment
+
memoryKind
+
CUpti_ActivityMemset4
+
CUpti_ActivityMemory
+
CUpti_ActivityMemory2
+
CUpti_ActivityMemset
+
CUpti_ActivityMemset2
+
CUpti_ActivityMemory3
+
CUpti_ActivityMemset3
+
memoryOperationType
+
CUpti_ActivityMemory3
+
CUpti_ActivityMemory2
+
memoryPoolConfig
+
CUpti_ActivityMemory2
+
CUpti_ActivityMemory3
+
memoryPoolOperationType
+
CUpti_ActivityMemoryPool2
+
CUpti_ActivityMemoryPool
+
memoryPoolType
+
CUpti_ActivityMemoryPool
+
CUpti_ActivityMemory3::CUpti_ActivityMemory3::PACKED_ALIGNMENT
+
CUpti_ActivityMemoryPool2
+
CUpti_ActivityMemory2
+
MergedPcSampDataBuffers
+
CUPTI::PcSamplingUtil::CUptiUtil_MergePcSampDataParams
+
migUuid
+
CUpti_ActivityDevice4
+
minBytesToKeep
+
CUpti_ActivityMemoryPool
+
CUpti_ActivityMemoryPool2
+
minNestingLevel
+
CUpti_Profiler_SetConfig_Params
+
moduleId
+
CUpti_ActivityFunction
+
CUpti_ModuleResourceData
+
+
+
+

N

+
+
name
+
CUpti_ActivityMemory
+
CUpti_ActivityMemory2
+
CUpti_ActivityKernel
+
CUpti_ActivityKernel5
+
CUpti_ActivityDevice4
+
CUpti_ActivityName
+
CUpti_ActivityKernel6
+
CUpti_ActivityMarker
+
CUpti_ActivityMarker2
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel7
+
CUpti_ActivityFunction
+
CUpti_ActivityKernel8
+
CUpti_ActivityMemory3
+
CUpti_ActivityKernel3
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityDevice
+
CUpti_ActivityKernel4
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
node
+
CUpti_GraphData
+
nodeType
+
CUpti_GraphData
+
nonUsrKernelsTotalSamples
+
CUpti_PCSamplingData
+
notPredOffThreadsExecuted
+
CUpti_ActivityInstructionExecution
+
nullStreamId
+
CUpti_ActivityContext
+
numAttributes
+
CUpti_PCSamplingConfigurationInfoParams
+
CUPTI::PcSamplingUtil::CUptiUtil_PutPcSampDataParams
+
CUPTI::PcSamplingUtil::CUptiUtil_GetPcSampDataParams
+
numberOfBuffers
+
CUPTI::PcSamplingUtil::CUptiUtil_MergePcSampDataParams
+
numEventGroups
+
CUpti_EventGroupSet
+
numGangs
+
CUpti_ActivityOpenAccLaunch
+
numMemcpyEngines
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
numMergedBuffer
+
CUPTI::PcSamplingUtil::CUptiUtil_MergePcSampDataParams
+
numMultiprocessors
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice2
+
CUpti_ActivityDevice4
+
CUpti_ActivityDevice3
+
numNestingLevels
+
CUpti_Profiler_SetConfig_Params
+
numRangesDropped
+
CUpti_Profiler_IsPassCollected_Params
+
CUpti_Profiler_FlushCounterData_Params
+
numSelectedStallReasons
+
BufferInfo
+
numSets
+
CUpti_EventGroupSets
+
numStallReasons
+
CUpti_PCSamplingGetNumStallReasonsParams
+
BufferInfo
+
PcSamplingStallReasons
+
CUpti_PCSamplingGetStallReasonsParams
+
numThreadsPerWarp
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice
+
CUpti_ActivityDevice4
+
CUpti_ActivityDevice2
+
numTraceBytesDropped
+
CUpti_Profiler_FlushCounterData_Params
+
CUpti_Profiler_IsPassCollected_Params
+
numWorkers
+
CUpti_ActivityOpenAccLaunch
+
nvlinkVersion
+
CUpti_ActivityNvLink
+
CUpti_ActivityNvLink3
+
CUpti_ActivityNvLink4
+
CUpti_ActivityNvLink2
+
nvswitchConnected
+
CUpti_ActivityNvLink3
+
CUpti_ActivityNvLink4
+
+
+ +
+

P

+
+
pAccessPolicyWindow
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
pad
+
CUpti_ActivityMemcpyPtoP
+
CUpti_ActivityMemoryPool
+
CUpti_ActivityBranch2
+
CUpti_ActivityInstantaneousMetricInstance
+
CUpti_PCSamplingPCData
+
CUpti_ActivityDevice2
+
CUpti_ActivityMemoryPool2
+
CUpti_ActivityDevice3
+
CUpti_ActivityDevice4
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityKernel
+
CUpti_ActivityMarker2
+
CUpti_ActivityInstructionExecution
+
CUpti_ActivityPreemption
+
CUpti_ActivityUnifiedMemoryCounter
+
CUpti_ActivityUnifiedMemoryCounter2
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityEventInstance
+
CUpti_ActivityModule
+
CUpti_ActivitySharedAccess
+
CUpti_ActivityMetric
+
CUpti_ActivityCudaEvent
+
CUpti_ActivityInstructionCorrelation
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityMetricInstance
+
CUpti_ActivityNvLink3
+
CUpti_ActivityNvLink4
+
CUpti_ActivityGlobalAccess2
+
CUpti_ActivityInstantaneousEventInstance
+
CUpti_ActivityInstantaneousMetric
+
pad0
+
CUpti_ActivityPcie
+
pad1
+
CUpti_ActivityOpenAccData
+
CUpti_ActivityOpenAccLaunch
+
pad2
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityMemset4
+
padding
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityMemset3
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
CUpti_ActivityJit
+
parentBlockX
+
CUpti_ActivityCdpKernel
+
parentBlockY
+
CUpti_ActivityCdpKernel
+
parentBlockZ
+
CUpti_ActivityCdpKernel
+
parentConstruct
+
CUpti_ActivityOpenAcc
+
parentGridId
+
CUpti_ActivityCdpKernel
+
partitionedGlobalCacheExecuted
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
partitionedGlobalCacheRequested
+
CUpti_ActivityKernel8
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
passIndex
+
CUpti_Profiler_SetConfig_Params
+
CUpti_Profiler_EndPass_Params
+
payload
+
CUpti_ActivityMarkerData
+
payloadKind
+
CUpti_ActivityMarkerData
+
pBufferInfoData
+
CUPTI::PcSamplingUtil::CUptiUtil_GetPcSampDataParams
+
PC
+
CUpti_ActivityMemory2
+
CUpti_ActivityMemory3
+
pcieGeneration
+
CUpti_ActivityPcie
+
pcieLinkGen
+
CUpti_ActivityEnvironment
+
pcieLinkWidth
+
CUpti_ActivityEnvironment
+
pcOffset
+
CUpti_ActivityGlobalAccess
+
CUpti_ActivityGlobalAccess2
+
CUpti_ActivityGlobalAccess3
+
CUpti_ActivityBranch
+
CUpti_ActivityBranch2
+
CUpti_ActivityInstructionExecution
+
CUpti_ActivityPCSampling
+
CUpti_ActivityPCSampling2
+
CUpti_ActivityPCSampling3
+
CUpti_ActivitySharedAccess
+
CUpti_ActivityInstructionCorrelation
+
CUpti_PCSamplingPCData
+
CUpti_GetSassToSourceCorrelationParams
+
pConfig
+
CUpti_Profiler_SetConfig_Params
+
pCounterAvailabilityImage
+
CUpti_Profiler_GetCounterAvailability_Params
+
pCounterDataFilePath
+
CUpti_Profiler_BeginSession_Params
+
pCounterDataImage
+
CUpti_Profiler_CounterDataImage_Initialize_Params
+
CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params
+
CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params
+
CUpti_Profiler_BeginSession_Params
+
pCounterDataPrefix
+
CUpti_Profiler_CounterDataImageOptions
+
pCounterDataScratchBuffer
+
CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params
+
CUpti_Profiler_BeginSession_Params
+
PcSampDataBuffer
+
CUPTI::PcSamplingUtil::CUptiUtil_MergePcSampDataParams
+
pcSamplingData
+
CUpti_PCSamplingGetDataParams
+
pcSamplingStallReasonIndex
+
CUpti_PCSamplingStallReason
+
pCubin
+
CUpti_ModuleResourceData
+
peerDev
+
CUpti_ActivityPcie
+
physicalNvLinkCount
+
CUpti_ActivityNvLink
+
CUpti_ActivityNvLink2
+
CUpti_ActivityNvLink3
+
CUpti_ActivityNvLink4
+
pid
+
CUpti_ActivityAutoBoostState
+
pOptions
+
CUpti_Profiler_CounterDataImage_CalculateSize_Params
+
CUpti_Profiler_CounterDataImage_Initialize_Params
+
portDev0
+
CUpti_ActivityNvLink
+
CUpti_ActivityNvLink3
+
CUpti_ActivityNvLink4
+
CUpti_ActivityNvLink2
+
portDev1
+
CUpti_ActivityNvLink4
+
CUpti_ActivityNvLink
+
CUpti_ActivityNvLink2
+
CUpti_ActivityNvLink3
+
power
+
CUpti_ActivityEnvironment
+
powerLimit
+
CUpti_ActivityEnvironment
+
pPcData
+
CUpti_PCSamplingData
+
pPCSamplingConfigurationInfo
+
CUPTI::PcSamplingUtil::CUptiUtil_PutPcSampDataParams
+
CUPTI::PcSamplingUtil::CUptiUtil_GetPcSampDataParams
+
CUpti_PCSamplingConfigurationInfoParams
+
pPcSamplingStallReasons
+
CUPTI::PcSamplingUtil::CUptiUtil_PutPcSampDataParams
+
CUPTI::PcSamplingUtil::CUptiUtil_GetPcSampDataParams
+
pPriv
+
CUpti_Profiler_SetConfig_Params
+
CUpti_Profiler_DeviceSupported_Params
+
CUpti_Profiler_DeInitialize_Params
+
CUpti_Profiler_DisableProfiling_Params
+
CUpti_Profiler_EndSession_Params
+
CUpti_PCSamplingConfigurationInfoParams
+
CUpti_PCSamplingStopParams
+
CUpti_Profiler_EnableProfiling_Params
+
CUpti_Profiler_BeginSession_Params
+
CUpti_Profiler_GetCounterAvailability_Params
+
CUpti_Profiler_Initialize_Params
+
CUpti_PCSamplingDisableParams
+
CUpti_PCSamplingGetNumStallReasonsParams
+
CUpti_Profiler_EndPass_Params
+
CUpti_PCSamplingGetStallReasonsParams
+
CUpti_Profiler_CounterDataImage_Initialize_Params
+
CUpti_Profiler_FlushCounterData_Params
+
CUpti_Profiler_CounterDataImageOptions
+
CUpti_Profiler_IsPassCollected_Params
+
CUpti_PCSamplingGetDataParams
+
CUpti_PCSamplingStartParams
+
CUpti_Profiler_UnsetConfig_Params
+
CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params
+
CUpti_PCSamplingEnableParams
+
CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params
+
NV::Cupti::Checkpoint::CUpti_Checkpoint
+
CUpti_Profiler_BeginPass_Params
+
CUpti_Profiler_CounterDataImage_CalculateSize_Params
+
preemptionKind
+
CUpti_ActivityPreemption
+
priority
+
CUpti_ActivityStream
+
processId
+
CUpti_ActivityMemoryPool
+
CUpti_ActivityAPI
+
CUpti_ActivityMemory2
+
CUpti_ActivityMemory3
+
CUpti_ActivityMemoryPool2
+
CUpti_ActivityUnifiedMemoryCounter
+
CUpti_ActivityUnifiedMemoryCounter2
+
CUpti_ActivityMemory3::CUpti_ActivityMemory3::PACKED_ALIGNMENT
+
CUpti_ActivityMemory2
+
CUpti_ActivityMemory
+
pSamplingData
+
CUPTI::PcSamplingUtil::CUptiUtil_GetPcSampDataParams
+
CUPTI::PcSamplingUtil::CUptiUtil_PutPcSampDataParams
+
pt
+
CUpti_ActivityObjectKindId
+
+
+ +
+

R

+
+
range
+
CUpti_Profiler_BeginSession_Params
+
rangeId
+
CUpti_PCSamplingData
+
recordCount
+
BufferInfo
+
registersPerThread
+
CUpti_ActivityKernel
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
CUpti_ActivityKernel3
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityKernel4
+
releaseThreshold
+
CUpti_ActivityMemoryPool2
+
CUpti_ActivityMemory2
+
CUpti_ActivityMemory3::CUpti_ActivityMemory3::PACKED_ALIGNMENT
+
CUpti_ActivityMemoryPool
+
remainingNumPcs
+
CUpti_PCSamplingData
+
replayMode
+
CUpti_Profiler_BeginSession_Params
+
requested
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
CUpti_ActivityCdpKernel
+
reserved
+
CUpti_ActivityExternalCorrelation
+
CUpti_ActivityInstantaneousEvent
+
CUpti_ActivityGraphTrace
+
reserved0
+
CUpti_ActivityMemcpy
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityMemcpyPtoP
+
CUpti_ActivityKernel2
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel
+
CUpti_ActivityMemset
+
CUpti_ActivityMemset4
+
CUpti_ActivityMemset3
+
CUpti_ActivityMemset2
+
CUpti_ActivityMemcpy3
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel7
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityKernel8
+
reserveDeviceMB
+
NV::Cupti::Checkpoint::CUpti_Checkpoint
+
reserveHostMB
+
NV::Cupti::Checkpoint::CUpti_Checkpoint
+
resourceDescriptor
+
CUpti_ResourceData
+
returnValue
+
CUpti_ActivityAPI
+
runtimeCorrelationId
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemcpy3
+
CUpti_ActivityKernel
+
CUpti_ActivityMemcpy
+
+
+
+

S

+
+
samples
+
CUpti_ActivityPCSampling
+
CUpti_ActivityPCSampling2
+
CUpti_PCSamplingStallReason
+
CUpti_ActivityPCSampling3
+
samplingDataBufferData
+
CUpti_PCSamplingConfigurationInfo
+
samplingPeriod
+
CUpti_ActivityPCSamplingConfig
+
samplingPeriod2
+
CUpti_ActivityPCSamplingConfig
+
samplingPeriodData
+
CUpti_PCSamplingConfigurationInfo
+
samplingPeriodInCycles
+
CUpti_ActivityPCSamplingRecordInfo
+
scope
+
CUpti_ActivityUnifiedMemoryCounterConfig
+
CUpti_ActivityUnifiedMemoryCounter
+
scratchBufferSizeData
+
CUpti_PCSamplingConfigurationInfo
+
secondaryBus
+
CUpti_ActivityPcie
+
sets
+
CUpti_EventGroupSets
+
sharedMemoryCarveoutRequested
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
CUpti_ActivityKernel4
+
sharedMemoryConfig
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
CUpti_ActivityCdpKernel
+
sharedMemoryExecuted
+
CUpti_ActivityKernel8
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
sharedTransactions
+
CUpti_ActivitySharedAccess
+
shmemLimitConfig
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityKernel8
+
size
+
CUpti_ActivityPCSamplingConfig
+
CUpti_ActivityMemory2
+
CUpti_ActivityMemory3::CUpti_ActivityMemory3::PACKED_ALIGNMENT
+
CUpti_ActivityMemoryPool
+
CUpti_ActivityMemoryPool2
+
CUpti_PCSamplingPCData
+
CUpti_PCSamplingData
+
CUpti_PCSamplingConfigurationInfoParams
+
CUpti_PCSamplingGetDataParams
+
CUpti_PCSamplingEnableParams
+
CUpti_PCSamplingDisableParams
+
CUpti_PCSamplingStartParams
+
CUpti_PCSamplingStopParams
+
CUpti_PCSamplingGetNumStallReasonsParams
+
CUpti_PCSamplingGetStallReasonsParams
+
CUpti_GetSassToSourceCorrelationParams
+
CUpti_GetCubinCrcParams
+
CUPTI::PcSamplingUtil::CUptiUtil_PutPcSampDataParams
+
CUPTI::PcSamplingUtil::CUptiUtil_GetHeaderDataParams
+
CUPTI::PcSamplingUtil::CUptiUtil_GetBufferInfoParams
+
CUPTI::PcSamplingUtil::CUptiUtil_GetPcSampDataParams
+
CUPTI::PcSamplingUtil::CUptiUtil_MergePcSampDataParams
+
sizeofCounterDataImageOptions
+
CUpti_Profiler_CounterDataImage_CalculateSize_Params
+
CUpti_Profiler_CounterDataImage_Initialize_Params
+
sli
+
CUpti_Profiler_DeviceSupported_Params
+
smClock
+
CUpti_ActivityEnvironment
+
sourceLocatorId
+
CUpti_ActivityGlobalAccess
+
CUpti_ActivityGlobalAccess2
+
CUpti_ActivityGlobalAccess3
+
CUpti_ActivityBranch
+
CUpti_ActivityBranch2
+
CUpti_ActivityInstructionExecution
+
CUpti_ActivityPCSampling
+
CUpti_ActivityPCSampling2
+
CUpti_ActivityPCSampling3
+
CUpti_ActivitySharedAccess
+
CUpti_ActivityInstructionCorrelation
+
speed
+
CUpti_ActivityEnvironment
+
srcContextId
+
CUpti_ActivityMemcpyPtoP
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityMemcpyPtoP4
+
srcDeviceId
+
CUpti_ActivityMemcpyPtoP
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityMemcpyPtoP4
+
srcId
+
CUpti_ActivityUnifiedMemoryCounter2
+
srcKind
+
CUpti_ActivityMemcpy
+
CUpti_ActivityMemcpy3
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityMemcpyPtoP
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityMemcpyPtoP4
+
stallReason
+
CUpti_ActivityPCSampling
+
CUpti_ActivityPCSampling2
+
CUpti_ActivityPCSampling3
+
CUpti_PCSamplingPCData
+
stallReasonCount
+
CUpti_PCSamplingPCData
+
stallReasonData
+
CUpti_PCSamplingConfigurationInfo
+
stallReasonIndex
+
CUpti_PCSamplingGetStallReasonsParams
+
PcSamplingStallReasons
+
stallReasons
+
CUpti_PCSamplingGetStallReasonsParams
+
PcSamplingStallReasons
+
start
+
CUpti_ActivitySynchronization
+
CUpti_ActivityOpenAcc
+
CUpti_ActivityOpenAccData
+
CUpti_ActivityOpenAccLaunch
+
CUpti_ActivityOpenAccOther
+
CUpti_ActivityOpenMp
+
CUpti_ActivityJit
+
CUpti_ActivityGraphTrace
+
CUpti_ActivityUnifiedMemoryCounter2
+
CUpti_ActivityKernel8
+
CUpti_ActivityKernel4
+
CUpti_ActivityMemset
+
CUpti_ActivityMemcpyPtoP
+
CUpti_ActivityMemcpy
+
CUpti_ActivityMemcpy3
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityMemset2
+
CUpti_ActivityMemset3
+
CUpti_ActivityMemset4
+
CUpti_ActivityMemory
+
CUpti_ActivityKernel
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityAPI
+
CUpti_ActivityOverhead
+
staticSharedMemory
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel8
+
CUpti_ActivityKernel
+
CUpti_ActivityKernel2
+
CUpti_ActivityKernel3
+
CUpti_ActivityKernel5
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityKernel6
+
CUpti_ActivityKernel7
+
stream
+
CUpti_ResourceData
+
CUpti_SynchronizeData
+
streamId
+
CUpti_ActivityMemcpy3
+
CUpti_ActivityKernel3
+
CUpti_ActivityMemcpyPtoP2
+
CUpti_ActivityKernel7
+
CUpti_ActivityMemset2
+
CUpti_ActivityCudaEvent
+
CUpti_ActivityMemory3
+
CUpti_ActivityKernel
+
CUpti_ActivityKernel2
+
CUpti_ActivityStream
+
CUpti_ActivityGraphTrace
+
CUpti_ActivityMemcpyPtoP
+
CUpti_ActivitySynchronization
+
CUpti_ActivityKernel6
+
CUpti_ActivityMemset4
+
CUpti_ActivityMemcpy
+
CUpti_ActivityMemory2
+
CUpti_ActivityKernel4
+
CUpti_ActivityMemcpyPtoP3
+
CUpti_ActivityMemcpyPtoP4
+
CUpti_ActivityMemcpy4
+
CUpti_ActivityMemset
+
CUpti_ActivityMemset3
+
CUpti_ActivityMemcpy5
+
CUpti_ActivityKernel8
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityUnifiedMemoryCounter2
+
CUpti_ActivityKernel5
+
structSize
+
NV::Cupti::Checkpoint::CUpti_Checkpoint
+
CUpti_Profiler_DeInitialize_Params
+
CUpti_Profiler_CounterDataImage_Initialize_Params
+
CUpti_Profiler_SetConfig_Params
+
CUpti_Profiler_CounterDataImageOptions
+
CUpti_Profiler_CounterDataImage_CalculateSize_Params
+
CUpti_Profiler_UnsetConfig_Params
+
CUpti_Profiler_EnableProfiling_Params
+
CUpti_Profiler_FlushCounterData_Params
+
CUpti_Profiler_Initialize_Params
+
CUpti_Profiler_EndPass_Params
+
CUpti_Profiler_IsPassCollected_Params
+
CUpti_Profiler_BeginSession_Params
+
CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params
+
CUpti_Profiler_BeginPass_Params
+
CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params
+
CUpti_Profiler_DisableProfiling_Params
+
CUpti_Profiler_GetCounterAvailability_Params
+
CUpti_Profiler_EndSession_Params
+
CUpti_Profiler_DeviceSupported_Params
+
submitted
+
CUpti_ActivityKernel8
+
CUpti_ActivityKernel4
+
CUpti_ActivityKernel5
+
CUpti_ActivityKernel6
+
CUpti_ActivityCdpKernel
+
CUpti_ActivityKernel7
+
symbolName
+
CUpti_CallbackData
+
+
+ + + +
+
+ +
+ +
+
+
+ + + + + + \ No newline at end of file diff --git a/doc/Cupti/functions_0x62.html b/doc/Cupti/functions_0x62.html new file mode 100644 index 0000000000000000000000000000000000000000..0c0cd2f0154089c649c387ac22c0d7d302505e5b --- /dev/null +++ b/doc/Cupti/functions_0x62.html @@ -0,0 +1,131 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- b -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x63.html b/doc/Cupti/functions_0x63.html new file mode 100644 index 0000000000000000000000000000000000000000..2de1c6accb9a577de474d610b1430a806797ebf0 --- /dev/null +++ b/doc/Cupti/functions_0x63.html @@ -0,0 +1,333 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- c -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x64.html b/doc/Cupti/functions_0x64.html new file mode 100644 index 0000000000000000000000000000000000000000..401acd3cc58e986075a405d286c854626a4d690a --- /dev/null +++ b/doc/Cupti/functions_0x64.html @@ -0,0 +1,161 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- d -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x65.html b/doc/Cupti/functions_0x65.html new file mode 100644 index 0000000000000000000000000000000000000000..b3eee40b775c6b1a315459bbcb36c437294060c8 --- /dev/null +++ b/doc/Cupti/functions_0x65.html @@ -0,0 +1,145 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- e -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x66.html b/doc/Cupti/functions_0x66.html new file mode 100644 index 0000000000000000000000000000000000000000..3fbbe287efb885343d2e0b9d66dedc6c4262d822 --- /dev/null +++ b/doc/Cupti/functions_0x66.html @@ -0,0 +1,142 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- f -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x67.html b/doc/Cupti/functions_0x67.html new file mode 100644 index 0000000000000000000000000000000000000000..914d6f553de9ade1c0d0b03917dd386510508da1 --- /dev/null +++ b/doc/Cupti/functions_0x67.html @@ -0,0 +1,149 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- g -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x68.html b/doc/Cupti/functions_0x68.html new file mode 100644 index 0000000000000000000000000000000000000000..37a57bbc4cf384a6528d935f889e476aea592565 --- /dev/null +++ b/doc/Cupti/functions_0x68.html @@ -0,0 +1,71 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- h -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x69.html b/doc/Cupti/functions_0x69.html new file mode 100644 index 0000000000000000000000000000000000000000..036ac0d336cb7e7119261266adff100531406d78 --- /dev/null +++ b/doc/Cupti/functions_0x69.html @@ -0,0 +1,123 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- i -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x6a.html b/doc/Cupti/functions_0x6a.html new file mode 100644 index 0000000000000000000000000000000000000000..3528e392acd053b0dcf013c1548390e5b521b442 --- /dev/null +++ b/doc/Cupti/functions_0x6a.html @@ -0,0 +1,71 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- j -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x6b.html b/doc/Cupti/functions_0x6b.html new file mode 100644 index 0000000000000000000000000000000000000000..388aa84076aec17a11d115dcf23e36b75b98b80e --- /dev/null +++ b/doc/Cupti/functions_0x6b.html @@ -0,0 +1,149 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- k -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x6c.html b/doc/Cupti/functions_0x6c.html new file mode 100644 index 0000000000000000000000000000000000000000..0490fbdcbcc2902afdd7dd9bedf56e0c8bf73d98 --- /dev/null +++ b/doc/Cupti/functions_0x6c.html @@ -0,0 +1,112 @@ + + +Cupti: Data Fields + + + + + + +
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x6d.html b/doc/Cupti/functions_0x6d.html new file mode 100644 index 0000000000000000000000000000000000000000..7edb493cb23dd8165ba6c599ea5b18776c62e1cb --- /dev/null +++ b/doc/Cupti/functions_0x6d.html @@ -0,0 +1,179 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- m -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x6e.html b/doc/Cupti/functions_0x6e.html new file mode 100644 index 0000000000000000000000000000000000000000..8ab242315568f938b16c64c54707a5eb948770da --- /dev/null +++ b/doc/Cupti/functions_0x6e.html @@ -0,0 +1,150 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- n -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x6f.html b/doc/Cupti/functions_0x6f.html new file mode 100644 index 0000000000000000000000000000000000000000..6a3018e556256160f408563e933c7d74ac74ba67 --- /dev/null +++ b/doc/Cupti/functions_0x6f.html @@ -0,0 +1,87 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- o -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x70.html b/doc/Cupti/functions_0x70.html new file mode 100644 index 0000000000000000000000000000000000000000..3ef03452505c0332893410d8d272ad2f2b48bff5 --- /dev/null +++ b/doc/Cupti/functions_0x70.html @@ -0,0 +1,282 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- p -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x71.html b/doc/Cupti/functions_0x71.html new file mode 100644 index 0000000000000000000000000000000000000000..b2fa6867b15387394e9bb06118b4b3c87bff397a --- /dev/null +++ b/doc/Cupti/functions_0x71.html @@ -0,0 +1,72 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- q -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x72.html b/doc/Cupti/functions_0x72.html new file mode 100644 index 0000000000000000000000000000000000000000..e8997c2cc84adf0323325da02044f32c678e78bf --- /dev/null +++ b/doc/Cupti/functions_0x72.html @@ -0,0 +1,138 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- r -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x73.html b/doc/Cupti/functions_0x73.html new file mode 100644 index 0000000000000000000000000000000000000000..b493ee0c0ef2c91b126b910a053a1d42cc0a3eb2 --- /dev/null +++ b/doc/Cupti/functions_0x73.html @@ -0,0 +1,303 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- s -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x74.html b/doc/Cupti/functions_0x74.html new file mode 100644 index 0000000000000000000000000000000000000000..ec6ed3c339deaf962514505eae12f2c0772cb88d --- /dev/null +++ b/doc/Cupti/functions_0x74.html @@ -0,0 +1,123 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- t -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x75.html b/doc/Cupti/functions_0x75.html new file mode 100644 index 0000000000000000000000000000000000000000..6576f3a2c0ee73b8af4494edbe1ca69d5c9d5538 --- /dev/null +++ b/doc/Cupti/functions_0x75.html @@ -0,0 +1,76 @@ + + +Cupti: Data Fields + + + + + +
+Here is a list of all documented struct and union fields with links to the struct/union documentation for each field: +

+

- u -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_0x76.html b/doc/Cupti/functions_0x76.html new file mode 100644 index 0000000000000000000000000000000000000000..44afc13a904b5b3fb8a620fe7d00486f834ab09c --- /dev/null +++ b/doc/Cupti/functions_0x76.html @@ -0,0 +1,89 @@ + + +Cupti: Data Fields + + + + + + +
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars.html b/doc/Cupti/functions_vars.html new file mode 100644 index 0000000000000000000000000000000000000000..9edb5927f1ddd3ed61613ff6bf23f6b3470d1650 --- /dev/null +++ b/doc/Cupti/functions_vars.html @@ -0,0 +1,89 @@ + + +Cupti: Data Fields - Variables + + + + + + +
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x62.html b/doc/Cupti/functions_vars_0x62.html new file mode 100644 index 0000000000000000000000000000000000000000..f230f9679129a66a335033d2c05ea5d11ec74da2 --- /dev/null +++ b/doc/Cupti/functions_vars_0x62.html @@ -0,0 +1,131 @@ + + +Cupti: Data Fields - Variables + + + + + +
+  +

+

- b -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x63.html b/doc/Cupti/functions_vars_0x63.html new file mode 100644 index 0000000000000000000000000000000000000000..1909ae85911df96e229ce9e932df5b63fdcaf4c9 --- /dev/null +++ b/doc/Cupti/functions_vars_0x63.html @@ -0,0 +1,333 @@ + + +Cupti: Data Fields - Variables + + + + + +
+  +

+

- c -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x64.html b/doc/Cupti/functions_vars_0x64.html new file mode 100644 index 0000000000000000000000000000000000000000..7aa3885a40992a655570ca36cefe7bc5421abb89 --- /dev/null +++ b/doc/Cupti/functions_vars_0x64.html @@ -0,0 +1,161 @@ + + +Cupti: Data Fields - Variables + + + + + +
+  +

+

- d -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x65.html b/doc/Cupti/functions_vars_0x65.html new file mode 100644 index 0000000000000000000000000000000000000000..3958d0c5d2db97b7dbb2c0f82d1a711a8675f6e5 --- /dev/null +++ b/doc/Cupti/functions_vars_0x65.html @@ -0,0 +1,145 @@ + + +Cupti: Data Fields - Variables + + + + + +
+  +

+

- e -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x66.html b/doc/Cupti/functions_vars_0x66.html new file mode 100644 index 0000000000000000000000000000000000000000..c51e5331e70e95bdf3e5c3d0f04e6e5a56821dec --- /dev/null +++ b/doc/Cupti/functions_vars_0x66.html @@ -0,0 +1,142 @@ + + +Cupti: Data Fields - Variables + + + + + +
+  +

+

- f -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x67.html b/doc/Cupti/functions_vars_0x67.html new file mode 100644 index 0000000000000000000000000000000000000000..aef5bb521ce3ad1c2f223dc2ec0f7c73c7fe9e81 --- /dev/null +++ b/doc/Cupti/functions_vars_0x67.html @@ -0,0 +1,149 @@ + + +Cupti: Data Fields - Variables + + + + + +
+  +

+

- g -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x68.html b/doc/Cupti/functions_vars_0x68.html new file mode 100644 index 0000000000000000000000000000000000000000..2fbeeabd5d6fe982d69e6776ae1b7f019a80294b --- /dev/null +++ b/doc/Cupti/functions_vars_0x68.html @@ -0,0 +1,71 @@ + + +Cupti: Data Fields - Variables + + + + + + +
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x69.html b/doc/Cupti/functions_vars_0x69.html new file mode 100644 index 0000000000000000000000000000000000000000..163212eb5711eaaced3f7a290a13105dd1ec95b9 --- /dev/null +++ b/doc/Cupti/functions_vars_0x69.html @@ -0,0 +1,123 @@ + + +Cupti: Data Fields - Variables + + + + + + +
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x6a.html b/doc/Cupti/functions_vars_0x6a.html new file mode 100644 index 0000000000000000000000000000000000000000..c6e547254c60d9163c364cbf0c31f5fc7849a082 --- /dev/null +++ b/doc/Cupti/functions_vars_0x6a.html @@ -0,0 +1,71 @@ + + +Cupti: Data Fields - Variables + + + + + +
+  +

+

- j -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x6b.html b/doc/Cupti/functions_vars_0x6b.html new file mode 100644 index 0000000000000000000000000000000000000000..6a929170b9a902b2828a4be8264924e6ac017981 --- /dev/null +++ b/doc/Cupti/functions_vars_0x6b.html @@ -0,0 +1,149 @@ + + +Cupti: Data Fields - Variables + + + + + +
+  +

+

- k -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x6c.html b/doc/Cupti/functions_vars_0x6c.html new file mode 100644 index 0000000000000000000000000000000000000000..d4d1971d6b1c476c566f92cff58edcb3cb924d2e --- /dev/null +++ b/doc/Cupti/functions_vars_0x6c.html @@ -0,0 +1,112 @@ + + +Cupti: Data Fields - Variables + + + + + + +
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x6d.html b/doc/Cupti/functions_vars_0x6d.html new file mode 100644 index 0000000000000000000000000000000000000000..9704d06a4b96247c12f7b41c20d2a19675364e82 --- /dev/null +++ b/doc/Cupti/functions_vars_0x6d.html @@ -0,0 +1,179 @@ + + +Cupti: Data Fields - Variables + + + + + +
+  +

+

- m -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x6e.html b/doc/Cupti/functions_vars_0x6e.html new file mode 100644 index 0000000000000000000000000000000000000000..4b4c543c54cd0e0b7e83f14b979a9218b73b8d2f --- /dev/null +++ b/doc/Cupti/functions_vars_0x6e.html @@ -0,0 +1,150 @@ + + +Cupti: Data Fields - Variables + + + + + +
+  +

+

- n -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x6f.html b/doc/Cupti/functions_vars_0x6f.html new file mode 100644 index 0000000000000000000000000000000000000000..f36d0c44cf5ba6b2a622dd87d03b6603158679c9 --- /dev/null +++ b/doc/Cupti/functions_vars_0x6f.html @@ -0,0 +1,87 @@ + + +Cupti: Data Fields - Variables + + + + + + +
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x70.html b/doc/Cupti/functions_vars_0x70.html new file mode 100644 index 0000000000000000000000000000000000000000..b3dddd31c8a153a52033a80bbe875836c0294241 --- /dev/null +++ b/doc/Cupti/functions_vars_0x70.html @@ -0,0 +1,282 @@ + + +Cupti: Data Fields - Variables + + + + + +
+  +

+

- p -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x71.html b/doc/Cupti/functions_vars_0x71.html new file mode 100644 index 0000000000000000000000000000000000000000..06162fd5a3cb1f9a3bf97f302fa0041914feab0b --- /dev/null +++ b/doc/Cupti/functions_vars_0x71.html @@ -0,0 +1,72 @@ + + +Cupti: Data Fields - Variables + + + + + + +
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x72.html b/doc/Cupti/functions_vars_0x72.html new file mode 100644 index 0000000000000000000000000000000000000000..2190d57367b215456273592b23b53b4ab6f83c28 --- /dev/null +++ b/doc/Cupti/functions_vars_0x72.html @@ -0,0 +1,138 @@ + + +Cupti: Data Fields - Variables + + + + + +
+  +

+

- r -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x73.html b/doc/Cupti/functions_vars_0x73.html new file mode 100644 index 0000000000000000000000000000000000000000..589acdd2416719aa0712fdb276f826ac5c12945b --- /dev/null +++ b/doc/Cupti/functions_vars_0x73.html @@ -0,0 +1,303 @@ + + +Cupti: Data Fields - Variables + + + + + +
+  +

+

- s -

+
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x74.html b/doc/Cupti/functions_vars_0x74.html new file mode 100644 index 0000000000000000000000000000000000000000..b712585037a62e12af433a8d8b4c3d5efef1b71d --- /dev/null +++ b/doc/Cupti/functions_vars_0x74.html @@ -0,0 +1,123 @@ + + +Cupti: Data Fields - Variables + + + + + + +
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x75.html b/doc/Cupti/functions_vars_0x75.html new file mode 100644 index 0000000000000000000000000000000000000000..2a2f4cf8e81202a4238ce1f15bcf608b544d2657 --- /dev/null +++ b/doc/Cupti/functions_vars_0x75.html @@ -0,0 +1,76 @@ + + +Cupti: Data Fields - Variables + + + + + + +
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/functions_vars_0x76.html b/doc/Cupti/functions_vars_0x76.html new file mode 100644 index 0000000000000000000000000000000000000000..af115a59346525d3269131906c16dfb9f5132d2f --- /dev/null +++ b/doc/Cupti/functions_vars_0x76.html @@ -0,0 +1,89 @@ + + +Cupti: Data Fields - Variables + + + + + + +
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/group__CUPTI__ACTIVITY__API.html b/doc/Cupti/group__CUPTI__ACTIVITY__API.html new file mode 100644 index 0000000000000000000000000000000000000000..d4ee08c1d4d80506a32b070de0f1d6c443aac3fa --- /dev/null +++ b/doc/Cupti/group__CUPTI__ACTIVITY__API.html @@ -0,0 +1,4056 @@ + + +Cupti: CUPTI Activity API + + + + + +
+

CUPTI Activity API

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Structures

struct  CUpti_Activity
 The base activity record. More...
struct  CUpti_ActivityAPI
 The activity record for a driver or runtime API invocation. More...
struct  CUpti_ActivityAutoBoostState
 Device auto boost state structure. More...
struct  CUpti_ActivityBranch
 The activity record for source level result branch. (deprecated). More...
struct  CUpti_ActivityBranch2
 The activity record for source level result branch. More...
struct  CUpti_ActivityCdpKernel
 The activity record for CDP (CUDA Dynamic Parallelism) kernel. More...
struct  CUpti_ActivityContext
 The activity record for a context. More...
struct  CUpti_ActivityCudaEvent
 The activity record for CUDA event. More...
struct  CUpti_ActivityDevice
 The activity record for a device. (deprecated). More...
struct  CUpti_ActivityDevice2
 The activity record for a device. (deprecated). More...
struct  CUpti_ActivityDevice3
 The activity record for a device. (CUDA 7.0 onwards). More...
struct  CUpti_ActivityDevice4
 The activity record for a device. (CUDA 11.6 onwards). More...
struct  CUpti_ActivityDeviceAttribute
 The activity record for a device attribute. More...
struct  CUpti_ActivityEnvironment
 The activity record for CUPTI environmental data. More...
struct  CUpti_ActivityEvent
 The activity record for a CUPTI event. More...
struct  CUpti_ActivityEventInstance
 The activity record for a CUPTI event with instance information. More...
struct  CUpti_ActivityExternalCorrelation
 The activity record for correlation with external records. More...
struct  CUpti_ActivityFunction
 The activity record for global/device functions. More...
struct  CUpti_ActivityGlobalAccess
 The activity record for source-level global access. (deprecated). More...
struct  CUpti_ActivityGlobalAccess2
 The activity record for source-level global access. (deprecated in CUDA 9.0). More...
struct  CUpti_ActivityGlobalAccess3
 The activity record for source-level global access. More...
struct  CUpti_ActivityGraphTrace
 The activity record for trace of graph execution. More...
struct  CUpti_ActivityInstantaneousEvent
 The activity record for an instantaneous CUPTI event. More...
struct  CUpti_ActivityInstantaneousEventInstance
 The activity record for an instantaneous CUPTI event with event domain instance information. More...
struct  CUpti_ActivityInstantaneousMetric
 The activity record for an instantaneous CUPTI metric. More...
struct  CUpti_ActivityInstantaneousMetricInstance
 The instantaneous activity record for a CUPTI metric with instance information. More...
struct  CUpti_ActivityInstructionCorrelation
 The activity record for source-level sass/source line-by-line correlation. More...
struct  CUpti_ActivityInstructionExecution
 The activity record for source-level instruction execution. More...
struct  CUpti_ActivityJit
 The activity record for JIT operations. This activity represents the JIT operations (compile, load, store) of a CUmodule from the Compute Cache. Gives the exact hashed path of where the cached module is loaded from, or where the module will be stored after Just-In-Time (JIT) compilation. More...
struct  CUpti_ActivityKernel
 The activity record for kernel. (deprecated). More...
struct  CUpti_ActivityKernel2
 The activity record for kernel. (deprecated). More...
struct  CUpti_ActivityKernel3
 The activity record for a kernel (CUDA 6.5(with sm_52 support) onwards). (deprecated in CUDA 9.0). More...
struct  CUpti_ActivityKernel4
 The activity record for a kernel (CUDA 9.0(with sm_70 support) onwards). (deprecated in CUDA 11.0). More...
struct  CUpti_ActivityKernel5
 The activity record for a kernel (CUDA 11.0(with sm_80 support) onwards). (deprecated in CUDA 11.2) This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL) but is no longer generated by CUPTI. Kernel activities are now reported using the CUpti_ActivityKernel8 activity record. More...
struct  CUpti_ActivityKernel6
 The activity record for kernel. (deprecated in CUDA 11.6). More...
struct  CUpti_ActivityKernel7
 The activity record for kernel. (deprecated in CUDA 11.8). More...
struct  CUpti_ActivityKernel8
 The activity record for kernel. More...
struct  CUpti_ActivityMarker
 The activity record providing a marker which is an instantaneous point in time. (deprecated in CUDA 8.0). More...
struct  CUpti_ActivityMarker2
 The activity record providing a marker which is an instantaneous point in time. More...
struct  CUpti_ActivityMarkerData
 The activity record providing detailed information for a marker. More...
struct  CUpti_ActivityMemcpy
 The activity record for memory copies. (deprecated). More...
struct  CUpti_ActivityMemcpy3
 The activity record for memory copies. (deprecated in CUDA 11.1). More...
struct  CUpti_ActivityMemcpy4
 The activity record for memory copies. (deprecated in CUDA 11.6). More...
struct  CUpti_ActivityMemcpy5
 The activity record for memory copies. More...
struct  CUpti_ActivityMemcpyPtoP
 The activity record for peer-to-peer memory copies. More...
struct  CUpti_ActivityMemcpyPtoP2
 The activity record for peer-to-peer memory copies. (deprecated in CUDA 11.1). More...
struct  CUpti_ActivityMemcpyPtoP3
 The activity record for peer-to-peer memory copies. (deprecated in CUDA 11.6). More...
struct  CUpti_ActivityMemcpyPtoP4
 The activity record for peer-to-peer memory copies. More...
struct  CUpti_ActivityMemory
 The activity record for memory. More...
struct  CUpti_ActivityMemory2
 The activity record for memory. More...
struct  CUpti_ActivityMemory3
 The activity record for memory. More...
struct  CUpti_ActivityMemoryPool
 The activity record for memory pool. More...
struct  CUpti_ActivityMemoryPool2
 The activity record for memory pool. More...
struct  CUpti_ActivityMemset
 The activity record for memset. (deprecated). More...
struct  CUpti_ActivityMemset2
 The activity record for memset. (deprecated in CUDA 11.1). More...
struct  CUpti_ActivityMemset3
 The activity record for memset. (deprecated in CUDA 11.6). More...
struct  CUpti_ActivityMemset4
 The activity record for memset. More...
struct  CUpti_ActivityMetric
 The activity record for a CUPTI metric. More...
struct  CUpti_ActivityMetricInstance
 The activity record for a CUPTI metric with instance information. More...
struct  CUpti_ActivityModule
 The activity record for a CUDA module. More...
struct  CUpti_ActivityName
 The activity record providing a name. More...
struct  CUpti_ActivityNvLink
 NVLink information. (deprecated in CUDA 9.0). More...
struct  CUpti_ActivityNvLink2
 NVLink information. (deprecated in CUDA 10.0). More...
struct  CUpti_ActivityNvLink3
 NVLink information. More...
struct  CUpti_ActivityNvLink4
 NVLink information. More...
union  CUpti_ActivityObjectKindId
 Identifiers for object kinds as specified by CUpti_ActivityObjectKind. More...
struct  CUpti_ActivityOpenAcc
 The base activity record for OpenAcc records. More...
struct  CUpti_ActivityOpenAccData
 The activity record for OpenACC data. More...
struct  CUpti_ActivityOpenAccLaunch
 The activity record for OpenACC launch. More...
struct  CUpti_ActivityOpenAccOther
 The activity record for OpenACC other. More...
struct  CUpti_ActivityOpenMp
 The base activity record for OpenMp records. More...
struct  CUpti_ActivityOverhead
 The activity record for CUPTI and driver overheads. More...
struct  CUpti_ActivityPcie
 PCI devices information required to construct topology. More...
struct  CUpti_ActivityPCSampling
 The activity record for PC sampling. (deprecated in CUDA 8.0). More...
struct  CUpti_ActivityPCSampling2
 The activity record for PC sampling. (deprecated in CUDA 9.0). More...
struct  CUpti_ActivityPCSampling3
 The activity record for PC sampling. More...
struct  CUpti_ActivityPCSamplingConfig
 PC sampling configuration structure. More...
struct  CUpti_ActivityPCSamplingRecordInfo
 The activity record for record status for PC sampling. More...
struct  CUpti_ActivityPreemption
 The activity record for a preemption of a CDP kernel. More...
struct  CUpti_ActivitySharedAccess
 The activity record for source-level shared access. More...
struct  CUpti_ActivitySourceLocator
 The activity record for source locator. More...
struct  CUpti_ActivityStream
 The activity record for CUDA stream. More...
struct  CUpti_ActivitySynchronization
 The activity record for synchronization management. More...
struct  CUpti_ActivityUnifiedMemoryCounter
 The activity record for Unified Memory counters (deprecated in CUDA 7.0). More...
struct  CUpti_ActivityUnifiedMemoryCounter2
 The activity record for Unified Memory counters (CUDA 7.0 and beyond). More...
struct  CUpti_ActivityUnifiedMemoryCounterConfig
 Unified Memory counters configuration structure. More...

Defines

#define CUPTI_AUTO_BOOST_INVALID_CLIENT_PID   0
#define CUPTI_CORRELATION_ID_UNKNOWN   0
#define CUPTI_FUNCTION_INDEX_ID_INVALID   0
#define CUPTI_GRID_ID_UNKNOWN   0LL
#define CUPTI_MAX_NVLINK_PORTS   32
#define CUPTI_NVLINK_INVALID_PORT   -1
#define CUPTI_SOURCE_LOCATOR_ID_UNKNOWN   0
#define CUPTI_SYNCHRONIZATION_INVALID_VALUE   -1
#define CUPTI_TIMESTAMP_UNKNOWN   0LL

Typedefs

typedef void(* CUpti_BuffersCallbackCompleteFunc )(CUcontext context, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize)
 Function type for callback used by CUPTI to return a buffer of activity records.
typedef void(* CUpti_BuffersCallbackRequestFunc )(uint8_t **buffer, size_t *size, size_t *maxNumRecords)
 Function type for callback used by CUPTI to request an empty buffer for storing activity records.
typedef uint64_t(* CUpti_TimestampCallbackFunc )(void)
 Function type for callback used by CUPTI to request a timestamp to be used in activity records.

Enumerations

enum  CUpti_ActivityAttribute {
+  CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE = 0, +
+  CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE_CDP = 1, +
+  CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT = 2, +
+  CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_SIZE = 3, +
+  CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_LIMIT = 4, +
+  CUPTI_ACTIVITY_ATTR_ZEROED_OUT_ACTIVITY_BUFFER = 5, +
+  CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_PRE_ALLOCATE_VALUE = 6, +
+  CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_PRE_ALLOCATE_VALUE = 7, +
+  CUPTI_ACTIVITY_ATTR_MEM_ALLOCATION_TYPE_HOST_PINNED = 8 +
+ }
 Activity attributes. More...
enum  CUpti_ActivityComputeApiKind {
+  CUPTI_ACTIVITY_COMPUTE_API_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_COMPUTE_API_CUDA = 1, +
+  CUPTI_ACTIVITY_COMPUTE_API_CUDA_MPS = 2 +
+ }
 The kind of a compute API. More...
enum  CUpti_ActivityEnvironmentKind {
+  CUPTI_ACTIVITY_ENVIRONMENT_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_ENVIRONMENT_SPEED = 1, +
+  CUPTI_ACTIVITY_ENVIRONMENT_TEMPERATURE = 2, +
+  CUPTI_ACTIVITY_ENVIRONMENT_POWER = 3, +
+  CUPTI_ACTIVITY_ENVIRONMENT_COOLING = 4 +
+ }
 The kind of environment data. Used to indicate what type of data is being reported by an environment activity record. More...
enum  CUpti_ActivityFlag {
+  CUPTI_ACTIVITY_FLAG_NONE = 0, +
+  CUPTI_ACTIVITY_FLAG_DEVICE_CONCURRENT_KERNELS = 1 << 0, +
+  CUPTI_ACTIVITY_FLAG_DEVICE_ATTRIBUTE_CUDEVICE = 1 << 0, +
+  CUPTI_ACTIVITY_FLAG_MEMCPY_ASYNC = 1 << 0, +
+  CUPTI_ACTIVITY_FLAG_MARKER_INSTANTANEOUS = 1 << 0, +
+  CUPTI_ACTIVITY_FLAG_MARKER_START = 1 << 1, +
+  CUPTI_ACTIVITY_FLAG_MARKER_END = 1 << 2, +
+  CUPTI_ACTIVITY_FLAG_MARKER_SYNC_ACQUIRE = 1 << 3, +
+  CUPTI_ACTIVITY_FLAG_MARKER_SYNC_ACQUIRE_SUCCESS = 1 << 4, +
+  CUPTI_ACTIVITY_FLAG_MARKER_SYNC_ACQUIRE_FAILED = 1 << 5, +
+  CUPTI_ACTIVITY_FLAG_MARKER_SYNC_RELEASE = 1 << 6, +
+  CUPTI_ACTIVITY_FLAG_MARKER_COLOR_NONE = 1 << 0, +
+  CUPTI_ACTIVITY_FLAG_MARKER_COLOR_ARGB = 1 << 1, +
+  CUPTI_ACTIVITY_FLAG_GLOBAL_ACCESS_KIND_SIZE_MASK = 0xFF << 0, +
+  CUPTI_ACTIVITY_FLAG_GLOBAL_ACCESS_KIND_LOAD = 1 << 8, +
+  CUPTI_ACTIVITY_FLAG_GLOBAL_ACCESS_KIND_CACHED = 1 << 9, +
+  CUPTI_ACTIVITY_FLAG_METRIC_OVERFLOWED = 1 << 0, +
+  CUPTI_ACTIVITY_FLAG_METRIC_VALUE_INVALID = 1 << 1, +
+  CUPTI_ACTIVITY_FLAG_INSTRUCTION_VALUE_INVALID = 1 << 0, +
+  CUPTI_ACTIVITY_FLAG_INSTRUCTION_CLASS_MASK = 0xFF << 1, +
+  CUPTI_ACTIVITY_FLAG_FLUSH_FORCED = 1 << 0, +
+  CUPTI_ACTIVITY_FLAG_SHARED_ACCESS_KIND_SIZE_MASK = 0xFF << 0, +
+  CUPTI_ACTIVITY_FLAG_SHARED_ACCESS_KIND_LOAD = 1 << 8, +
+  CUPTI_ACTIVITY_FLAG_MEMSET_ASYNC = 1 << 0, +
+  CUPTI_ACTIVITY_FLAG_THRASHING_IN_CPU = 1 << 0, +
+  CUPTI_ACTIVITY_FLAG_THROTTLING_IN_CPU = 1 << 0 +
+ }
 Flags associated with activity records. More...
enum  CUpti_ActivityInstructionClass {
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_FP_32 = 1, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_FP_64 = 2, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_INTEGER = 3, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_BIT_CONVERSION = 4, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_CONTROL_FLOW = 5, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_GLOBAL = 6, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_SHARED = 7, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_LOCAL = 8, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_GENERIC = 9, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_SURFACE = 10, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_CONSTANT = 11, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_TEXTURE = 12, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_GLOBAL_ATOMIC = 13, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_SHARED_ATOMIC = 14, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_SURFACE_ATOMIC = 15, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_INTER_THREAD_COMMUNICATION = 16, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_BARRIER = 17, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_MISCELLANEOUS = 18, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_FP_16 = 19, +
+  CUPTI_ACTIVITY_INSTRUCTION_CLASS_UNIFORM = 20 +
+ }
 SASS instruction classification. More...
enum  CUpti_ActivityJitEntryType { ,
+  CUPTI_ACTIVITY_JIT_ENTRY_PTX_TO_CUBIN = 1, +
+  CUPTI_ACTIVITY_JIT_ENTRY_NVVM_IR_TO_PTX = 2 +
+ }
 The types of JIT entry. More...
enum  CUpti_ActivityJitOperationType { ,
+  CUPTI_ACTIVITY_JIT_OPERATION_CACHE_LOAD = 1, +
+  CUPTI_ACTIVITY_JIT_OPERATION_CACHE_STORE = 2, +
+  CUPTI_ACTIVITY_JIT_OPERATION_COMPILE = 3 +
+ }
 The types of JIT compilation operations. More...
enum  CUpti_ActivityKind {
+  CUPTI_ACTIVITY_KIND_INVALID = 0, +
+  CUPTI_ACTIVITY_KIND_MEMCPY = 1, +
+  CUPTI_ACTIVITY_KIND_MEMSET = 2, +
+  CUPTI_ACTIVITY_KIND_KERNEL = 3, +
+  CUPTI_ACTIVITY_KIND_DRIVER = 4, +
+  CUPTI_ACTIVITY_KIND_RUNTIME = 5, +
+  CUPTI_ACTIVITY_KIND_EVENT = 6, +
+  CUPTI_ACTIVITY_KIND_METRIC = 7, +
+  CUPTI_ACTIVITY_KIND_DEVICE = 8, +
+  CUPTI_ACTIVITY_KIND_CONTEXT = 9, +
+  CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL = 10, +
+  CUPTI_ACTIVITY_KIND_NAME = 11, +
+  CUPTI_ACTIVITY_KIND_MARKER = 12, +
+  CUPTI_ACTIVITY_KIND_MARKER_DATA = 13, +
+  CUPTI_ACTIVITY_KIND_SOURCE_LOCATOR = 14, +
+  CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS = 15, +
+  CUPTI_ACTIVITY_KIND_BRANCH = 16, +
+  CUPTI_ACTIVITY_KIND_OVERHEAD = 17, +
+  CUPTI_ACTIVITY_KIND_CDP_KERNEL = 18, +
+  CUPTI_ACTIVITY_KIND_PREEMPTION = 19, +
+  CUPTI_ACTIVITY_KIND_ENVIRONMENT = 20, +
+  CUPTI_ACTIVITY_KIND_EVENT_INSTANCE = 21, +
+  CUPTI_ACTIVITY_KIND_MEMCPY2 = 22, +
+  CUPTI_ACTIVITY_KIND_METRIC_INSTANCE = 23, +
+  CUPTI_ACTIVITY_KIND_INSTRUCTION_EXECUTION = 24, +
+  CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER = 25, +
+  CUPTI_ACTIVITY_KIND_FUNCTION = 26, +
+  CUPTI_ACTIVITY_KIND_MODULE = 27, +
+  CUPTI_ACTIVITY_KIND_DEVICE_ATTRIBUTE = 28, +
+  CUPTI_ACTIVITY_KIND_SHARED_ACCESS = 29, +
+  CUPTI_ACTIVITY_KIND_PC_SAMPLING = 30, +
+  CUPTI_ACTIVITY_KIND_PC_SAMPLING_RECORD_INFO = 31, +
+  CUPTI_ACTIVITY_KIND_INSTRUCTION_CORRELATION = 32, +
+  CUPTI_ACTIVITY_KIND_OPENACC_DATA = 33, +
+  CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH = 34, +
+  CUPTI_ACTIVITY_KIND_OPENACC_OTHER = 35, +
+  CUPTI_ACTIVITY_KIND_CUDA_EVENT = 36, +
+  CUPTI_ACTIVITY_KIND_STREAM = 37, +
+  CUPTI_ACTIVITY_KIND_SYNCHRONIZATION = 38, +
+  CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION = 39, +
+  CUPTI_ACTIVITY_KIND_NVLINK = 40, +
+  CUPTI_ACTIVITY_KIND_INSTANTANEOUS_EVENT = 41, +
+  CUPTI_ACTIVITY_KIND_INSTANTANEOUS_EVENT_INSTANCE = 42, +
+  CUPTI_ACTIVITY_KIND_INSTANTANEOUS_METRIC = 43, +
+  CUPTI_ACTIVITY_KIND_INSTANTANEOUS_METRIC_INSTANCE = 44, +
+  CUPTI_ACTIVITY_KIND_MEMORY = 45, +
+  CUPTI_ACTIVITY_KIND_PCIE = 46, +
+  CUPTI_ACTIVITY_KIND_OPENMP = 47, +
+  CUPTI_ACTIVITY_KIND_INTERNAL_LAUNCH_API = 48, +
+  CUPTI_ACTIVITY_KIND_MEMORY2 = 49, +
+  CUPTI_ACTIVITY_KIND_MEMORY_POOL = 50, +
+  CUPTI_ACTIVITY_KIND_GRAPH_TRACE = 51, +
+  CUPTI_ACTIVITY_KIND_JIT = 52 +
+ }
 The kinds of activity records. More...
enum  CUpti_ActivityLaunchType {
+  CUPTI_ACTIVITY_LAUNCH_TYPE_REGULAR = 0, +
+  CUPTI_ACTIVITY_LAUNCH_TYPE_COOPERATIVE_SINGLE_DEVICE = 1, +
+  CUPTI_ACTIVITY_LAUNCH_TYPE_COOPERATIVE_MULTI_DEVICE = 2 +
+ }
 The type of the CUDA kernel launch. More...
enum  CUpti_ActivityMemcpyKind {
+  CUPTI_ACTIVITY_MEMCPY_KIND_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_MEMCPY_KIND_HTOD = 1, +
+  CUPTI_ACTIVITY_MEMCPY_KIND_DTOH = 2, +
+  CUPTI_ACTIVITY_MEMCPY_KIND_HTOA = 3, +
+  CUPTI_ACTIVITY_MEMCPY_KIND_ATOH = 4, +
+  CUPTI_ACTIVITY_MEMCPY_KIND_ATOA = 5, +
+  CUPTI_ACTIVITY_MEMCPY_KIND_ATOD = 6, +
+  CUPTI_ACTIVITY_MEMCPY_KIND_DTOA = 7, +
+  CUPTI_ACTIVITY_MEMCPY_KIND_DTOD = 8, +
+  CUPTI_ACTIVITY_MEMCPY_KIND_HTOH = 9, +
+  CUPTI_ACTIVITY_MEMCPY_KIND_PTOP = 10 +
+ }
 The kind of a memory copy, indicating the source and destination targets of the copy. More...
enum  CUpti_ActivityMemoryKind {
+  CUPTI_ACTIVITY_MEMORY_KIND_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_MEMORY_KIND_PAGEABLE = 1, +
+  CUPTI_ACTIVITY_MEMORY_KIND_PINNED = 2, +
+  CUPTI_ACTIVITY_MEMORY_KIND_DEVICE = 3, +
+  CUPTI_ACTIVITY_MEMORY_KIND_ARRAY = 4, +
+  CUPTI_ACTIVITY_MEMORY_KIND_MANAGED = 5, +
+  CUPTI_ACTIVITY_MEMORY_KIND_DEVICE_STATIC = 6, +
+  CUPTI_ACTIVITY_MEMORY_KIND_MANAGED_STATIC = 7 +
+ }
 The kinds of memory accessed by a memory operation/copy. More...
enum  CUpti_ActivityMemoryOperationType { ,
+  CUPTI_ACTIVITY_MEMORY_OPERATION_TYPE_ALLOCATION = 1, +
+  CUPTI_ACTIVITY_MEMORY_OPERATION_TYPE_RELEASE = 2 +
+ }
 Memory operation types. More...
enum  CUpti_ActivityMemoryPoolOperationType { ,
+  CUPTI_ACTIVITY_MEMORY_POOL_OPERATION_TYPE_CREATED = 1, +
+  CUPTI_ACTIVITY_MEMORY_POOL_OPERATION_TYPE_DESTROYED = 2, +
+  CUPTI_ACTIVITY_MEMORY_POOL_OPERATION_TYPE_TRIMMED = 3 +
+ }
 Memory pool operation types. More...
enum  CUpti_ActivityMemoryPoolType { ,
+  CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL = 1, +
+  CUPTI_ACTIVITY_MEMORY_POOL_TYPE_IMPORTED = 2 +
+ }
 Memory pool types. More...
enum  CUpti_ActivityObjectKind {
+  CUPTI_ACTIVITY_OBJECT_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_OBJECT_PROCESS = 1, +
+  CUPTI_ACTIVITY_OBJECT_THREAD = 2, +
+  CUPTI_ACTIVITY_OBJECT_DEVICE = 3, +
+  CUPTI_ACTIVITY_OBJECT_CONTEXT = 4, +
+  CUPTI_ACTIVITY_OBJECT_STREAM = 5 +
+ }
 The kinds of activity objects. More...
enum  CUpti_ActivityOverheadKind {
+  CUPTI_ACTIVITY_OVERHEAD_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_OVERHEAD_DRIVER_COMPILER = 1, +
+  CUPTI_ACTIVITY_OVERHEAD_CUPTI_BUFFER_FLUSH = 1<<16, +
+  CUPTI_ACTIVITY_OVERHEAD_CUPTI_INSTRUMENTATION = 2<<16, +
+  CUPTI_ACTIVITY_OVERHEAD_CUPTI_RESOURCE = 3<<16 +
+ }
 The kinds of activity overhead. More...
enum  CUpti_ActivityPartitionedGlobalCacheConfig {
+  CUPTI_ACTIVITY_PARTITIONED_GLOBAL_CACHE_CONFIG_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_PARTITIONED_GLOBAL_CACHE_CONFIG_NOT_SUPPORTED = 1, +
+  CUPTI_ACTIVITY_PARTITIONED_GLOBAL_CACHE_CONFIG_OFF = 2, +
+  CUPTI_ACTIVITY_PARTITIONED_GLOBAL_CACHE_CONFIG_ON = 3 +
+ }
 Partitioned global caching option. More...
enum  CUpti_ActivityPCSamplingPeriod {
+  CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_INVALID = 0, +
+  CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_MIN = 1, +
+  CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_LOW = 2, +
+  CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_MID = 3, +
+  CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_HIGH = 4, +
+  CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_MAX = 5 +
+ }
 Sampling period for PC sampling method. More...
enum  CUpti_ActivityPCSamplingStallReason {
+  CUPTI_ACTIVITY_PC_SAMPLING_STALL_INVALID = 0, +
+  CUPTI_ACTIVITY_PC_SAMPLING_STALL_NONE = 1, +
+  CUPTI_ACTIVITY_PC_SAMPLING_STALL_INST_FETCH = 2, +
+  CUPTI_ACTIVITY_PC_SAMPLING_STALL_EXEC_DEPENDENCY = 3, +
+  CUPTI_ACTIVITY_PC_SAMPLING_STALL_MEMORY_DEPENDENCY = 4, +
+  CUPTI_ACTIVITY_PC_SAMPLING_STALL_TEXTURE = 5, +
+  CUPTI_ACTIVITY_PC_SAMPLING_STALL_SYNC = 6, +
+  CUPTI_ACTIVITY_PC_SAMPLING_STALL_CONSTANT_MEMORY_DEPENDENCY = 7, +
+  CUPTI_ACTIVITY_PC_SAMPLING_STALL_PIPE_BUSY = 8, +
+  CUPTI_ACTIVITY_PC_SAMPLING_STALL_MEMORY_THROTTLE = 9, +
+  CUPTI_ACTIVITY_PC_SAMPLING_STALL_NOT_SELECTED = 10, +
+  CUPTI_ACTIVITY_PC_SAMPLING_STALL_OTHER = 11, +
+  CUPTI_ACTIVITY_PC_SAMPLING_STALL_SLEEPING = 12 +
+ }
 The stall reason for PC sampling activity. More...
enum  CUpti_ActivityPreemptionKind {
+  CUPTI_ACTIVITY_PREEMPTION_KIND_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_PREEMPTION_KIND_SAVE = 1, +
+  CUPTI_ACTIVITY_PREEMPTION_KIND_RESTORE = 2 +
+ }
 The kind of a preemption activity. More...
enum  CUpti_ActivityStreamFlag {
+  CUPTI_ACTIVITY_STREAM_CREATE_FLAG_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_STREAM_CREATE_FLAG_DEFAULT = 1, +
+  CUPTI_ACTIVITY_STREAM_CREATE_FLAG_NON_BLOCKING = 2, +
+  CUPTI_ACTIVITY_STREAM_CREATE_FLAG_NULL = 3, +
+  CUPTI_ACTIVITY_STREAM_CREATE_MASK = 0xFFFF +
+ }
 stream type. More...
enum  CUpti_ActivitySynchronizationType {
+  CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_EVENT_SYNCHRONIZE = 1, +
+  CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_STREAM_WAIT_EVENT = 2, +
+  CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_STREAM_SYNCHRONIZE = 3, +
+  CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_CONTEXT_SYNCHRONIZE = 4 +
+ }
 Synchronization type. More...
enum  CUpti_ActivityThreadIdType {
+  CUPTI_ACTIVITY_THREAD_ID_TYPE_DEFAULT = 0, +
+  CUPTI_ACTIVITY_THREAD_ID_TYPE_SYSTEM = 1 +
+ }
 Thread-Id types. More...
enum  CUpti_ActivityUnifiedMemoryAccessType {
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_ACCESS_TYPE_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_ACCESS_TYPE_READ = 1, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_ACCESS_TYPE_WRITE = 2, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_ACCESS_TYPE_ATOMIC = 3, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_ACCESS_TYPE_PREFETCH = 4 +
+ }
 Memory access type for unified memory page faults. More...
enum  CUpti_ActivityUnifiedMemoryCounterKind {
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD = 1, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH = 2, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT = 3, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT = 4, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THRASHING = 5, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THROTTLING = 6, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_REMOTE_MAP = 7, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOD = 8 +
+ }
 Kind of the Unified Memory counter. More...
enum  CUpti_ActivityUnifiedMemoryCounterScope {
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE = 1, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_ALL_DEVICES = 2 +
+ }
 Scope of the unified memory counter (deprecated in CUDA 7.0). More...
enum  CUpti_ActivityUnifiedMemoryMigrationCause {
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_USER = 1, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_COHERENCE = 2, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_PREFETCH = 3, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_EVICTION = 4, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_ACCESS_COUNTERS = 5 +
+ }
 Migration cause of the Unified Memory counter. More...
enum  CUpti_ActivityUnifiedMemoryRemoteMapCause {
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_UNKNOWN = 0, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_COHERENCE = 1, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_THRASHING = 2, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_POLICY = 3, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_OUT_OF_MEMORY = 4, +
+  CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_EVICTION = 5 +
+ }
 Remote memory map cause of the Unified Memory counter. More...
enum  CUpti_DeviceVirtualizationMode {
+  CUPTI_DEVICE_VIRTUALIZATION_MODE_NONE = 0, +
+  CUPTI_DEVICE_VIRTUALIZATION_MODE_PASS_THROUGH = 1, +
+  CUPTI_DEVICE_VIRTUALIZATION_MODE_VIRTUAL_GPU = 2 +
+ }
enum  CUpti_DevType { ,
+  CUPTI_DEV_TYPE_GPU = 1, +
+  CUPTI_DEV_TYPE_NPU = 2 +
+ }
 The device type for device connected to NVLink. More...
enum  CUpti_EnvironmentClocksThrottleReason {
+  CUPTI_CLOCKS_THROTTLE_REASON_GPU_IDLE = 0x00000001, +
+  CUPTI_CLOCKS_THROTTLE_REASON_USER_DEFINED_CLOCKS = 0x00000002, +
+  CUPTI_CLOCKS_THROTTLE_REASON_SW_POWER_CAP = 0x00000004, +
+  CUPTI_CLOCKS_THROTTLE_REASON_HW_SLOWDOWN = 0x00000008, +
+  CUPTI_CLOCKS_THROTTLE_REASON_UNKNOWN = 0x80000000, +
+  CUPTI_CLOCKS_THROTTLE_REASON_UNSUPPORTED = 0x40000000, +
+  CUPTI_CLOCKS_THROTTLE_REASON_NONE = 0x00000000 +
+ }
 Reasons for clock throttling. More...
enum  CUpti_ExternalCorrelationKind { ,
+  CUPTI_EXTERNAL_CORRELATION_KIND_UNKNOWN = 1, +
+  CUPTI_EXTERNAL_CORRELATION_KIND_OPENACC = 2, +
+  CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM0 = 3, +
+  CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM1 = 4, +
+  CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM2 = 5, +
+  CUPTI_EXTERNAL_CORRELATION_KIND_SIZE +
+ }
 The kind of external APIs supported for correlation. More...
enum  CUpti_FuncShmemLimitConfig
 The shared memory limit per block config for a kernel This should be used to set 'cudaOccFuncShmemConfig' field in occupancy calculator API.
enum  CUpti_LinkFlag { ,
+  CUPTI_LINK_FLAG_PEER_ACCESS = (1 << 1), +
+  CUPTI_LINK_FLAG_SYSMEM_ACCESS = (1 << 2), +
+  CUPTI_LINK_FLAG_PEER_ATOMICS = (1 << 3), +
+  CUPTI_LINK_FLAG_SYSMEM_ATOMICS = (1 << 4) +
+ }
 Link flags. More...
enum  CUpti_OpenAccConstructKind
 The OpenAcc parent construct kind for OpenAcc activity records.
enum  CUpti_OpenAccEventKind
 The OpenAcc event kind for OpenAcc activity records. More...
enum  CUpti_PcieDeviceType {
+  CUPTI_PCIE_DEVICE_TYPE_GPU = 0, +
+  CUPTI_PCIE_DEVICE_TYPE_BRIDGE = 1 +
+ }
enum  CUpti_PcieGen {
+  CUPTI_PCIE_GEN_GEN1 = 1, +
+  CUPTI_PCIE_GEN_GEN2 = 2, +
+  CUPTI_PCIE_GEN_GEN3 = 3, +
+  CUPTI_PCIE_GEN_GEN4 = 4, +
+  CUPTI_PCIE_GEN_GEN5 = 5 +
+ }
 PCIE Generation. More...

Functions

CUptiResult cuptiActivityConfigurePCSampling (CUcontext ctx, CUpti_ActivityPCSamplingConfig *config)
 Set PC sampling configuration.
CUptiResult cuptiActivityConfigureUnifiedMemoryCounter (CUpti_ActivityUnifiedMemoryCounterConfig *config, uint32_t count)
 Set Unified Memory Counter configuration.
CUptiResult cuptiActivityDisable (CUpti_ActivityKind kind)
 Disable collection of a specific kind of activity record.
CUptiResult cuptiActivityDisableContext (CUcontext context, CUpti_ActivityKind kind)
 Disable collection of a specific kind of activity record for a context.
CUptiResult cuptiActivityEnable (CUpti_ActivityKind kind)
 Enable collection of a specific kind of activity record.
CUptiResult cuptiActivityEnableAndDump (CUpti_ActivityKind kind)
 Enable collection of a specific kind of activity record. For certain activity kinds it dumps existing records.
CUptiResult cuptiActivityEnableContext (CUcontext context, CUpti_ActivityKind kind)
 Enable collection of a specific kind of activity record for a context.
CUptiResult cuptiActivityEnableLatencyTimestamps (uint8_t enable)
 Controls the collection of queued and submitted timestamps for kernels.
CUptiResult cuptiActivityEnableLaunchAttributes (uint8_t enable)
 Controls the collection of launch attributes for kernels.
CUptiResult cuptiActivityFlush (CUcontext context, uint32_t streamId, uint32_t flag)
 Wait for all activity records to be delivered via the completion callback.
CUptiResult cuptiActivityFlushAll (uint32_t flag)
 Request to deliver activity records via the buffer completion callback.
CUptiResult cuptiActivityFlushPeriod (uint32_t time)
 Sets the flush period for the worker thread.
CUptiResult cuptiActivityGetAttribute (CUpti_ActivityAttribute attr, size_t *valueSize, void *value)
 Read an activity API attribute.
CUptiResult cuptiActivityGetNextRecord (uint8_t *buffer, size_t validBufferSizeBytes, CUpti_Activity **record)
 Iterate over the activity records in a buffer.
CUptiResult cuptiActivityGetNumDroppedRecords (CUcontext context, uint32_t streamId, size_t *dropped)
 Get the number of activity records that were dropped of insufficient buffer space.
CUptiResult cuptiActivityPopExternalCorrelationId (CUpti_ExternalCorrelationKind kind, uint64_t *lastId)
 Pop an external correlation id for the calling thread.
CUptiResult cuptiActivityPushExternalCorrelationId (CUpti_ExternalCorrelationKind kind, uint64_t id)
 Push an external correlation id for the calling thread.
CUptiResult cuptiActivityRegisterCallbacks (CUpti_BuffersCallbackRequestFunc funcBufferRequested, CUpti_BuffersCallbackCompleteFunc funcBufferCompleted)
 Registers callback functions with CUPTI for activity buffer handling.
CUptiResult cuptiActivityRegisterTimestampCallback (CUpti_TimestampCallbackFunc funcTimestamp)
 Registers callback function with CUPTI for providing timestamp.
CUptiResult cuptiActivitySetAttribute (CUpti_ActivityAttribute attr, size_t *valueSize, void *value)
 Write an activity API attribute.
CUptiResult cuptiComputeCapabilitySupported (int major, int minor, int *support)
 Check support for a compute capability.
CUptiResult cuptiDeviceSupported (CUdevice dev, int *support)
 Check support for a compute device.
CUptiResult cuptiDeviceVirtualizationMode (CUdevice dev, CUpti_DeviceVirtualizationMode *mode)
 Query the virtualization mode of the device.
CUptiResult cuptiFinalize (void)
 Detach CUPTI from the running process.
CUptiResult cuptiGetAutoBoostState (CUcontext context, CUpti_ActivityAutoBoostState *state)
 Get auto boost state.
CUptiResult cuptiGetContextId (CUcontext context, uint32_t *contextId)
 Get the ID of a context.
CUptiResult cuptiGetDeviceId (CUcontext context, uint32_t *deviceId)
 Get the ID of a device.
CUptiResult cuptiGetGraphId (CUgraph graph, uint32_t *pId)
 Get the unique ID of graph.
CUptiResult cuptiGetGraphNodeId (CUgraphNode node, uint64_t *nodeId)
 Get the unique ID of a graph node.
CUptiResult cuptiGetLastError (void)
 Returns the last error from a cupti call or callback.
CUptiResult cuptiGetStreamId (CUcontext context, CUstream stream, uint32_t *streamId)
 Get the ID of a stream.
CUptiResult cuptiGetStreamIdEx (CUcontext context, CUstream stream, uint8_t perThreadStream, uint32_t *streamId)
 Get the ID of a stream.
CUptiResult cuptiGetThreadIdType (CUpti_ActivityThreadIdType *type)
 Get the thread-id type.
CUptiResult cuptiGetTimestamp (uint64_t *timestamp)
 Get the CUPTI timestamp.
CUptiResult cuptiSetThreadIdType (CUpti_ActivityThreadIdType type)
 Set the thread-id type.
+

Detailed Description

+Functions, types, and enums that implement the CUPTI Activity API.

Define Documentation

+ +
+
+ + + + +
#define CUPTI_AUTO_BOOST_INVALID_CLIENT_PID   0
+
+
+ +

+An invalid/unknown process id. +

+

+ +

+
+ + + + +
#define CUPTI_CORRELATION_ID_UNKNOWN   0
+
+
+ +

+An invalid/unknown correlation ID. A correlation ID of this value indicates that there is no correlation for the activity record. +

+

+ +

+
+ + + + +
#define CUPTI_FUNCTION_INDEX_ID_INVALID   0
+
+
+ +

+An invalid function index ID. +

+

+ +

+
+ + + + +
#define CUPTI_GRID_ID_UNKNOWN   0LL
+
+
+ +

+An invalid/unknown grid ID. +

+

+ +

+
+ + + + +
#define CUPTI_MAX_NVLINK_PORTS   32
+
+
+ +

+Maximum NVLink port numbers. +

+

+ +

+
+ + + + +
#define CUPTI_NVLINK_INVALID_PORT   -1
+
+
+ +

+Invalid/unknown NVLink port number. +

+

+ +

+
+ + + + +
#define CUPTI_SOURCE_LOCATOR_ID_UNKNOWN   0
+
+
+ +

+The source-locator ID that indicates an unknown source location. There is not an actual CUpti_ActivitySourceLocator object corresponding to this value. +

+

+ +

+
+ + + + +
#define CUPTI_SYNCHRONIZATION_INVALID_VALUE   -1
+
+
+ +

+An invalid/unknown value. +

+

+ +

+
+ + + + +
#define CUPTI_TIMESTAMP_UNKNOWN   0LL
+
+
+ +

+An invalid/unknown timestamp for a start, end, queued, submitted, or completed time. +

+

+


Typedef Documentation

+ +
+
+ + + + +
typedef void( * CUpti_BuffersCallbackCompleteFunc)(CUcontext context, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize)
+
+
+ +

+This callback function returns to the CUPTI client a buffer containing activity records. The buffer contains validSize bytes of activity records which should be read using cuptiActivityGetNextRecord. The number of dropped records can be read using cuptiActivityGetNumDroppedRecords. After this call CUPTI relinquished ownership of the buffer and will not use it anymore. The client may return the buffer to CUPTI using the CUpti_BuffersCallbackRequestFunc callback. Note: CUDA 6.0 onwards, all buffers returned by this callback are global buffers i.e. there is no context/stream specific buffer. User needs to parse the global buffer to extract the context/stream specific activity records.

+

Parameters:
+ + + + + + +
context The context this buffer is associated with. If NULL, the buffer is associated with the global activities. This field is deprecated as of CUDA 6.0 and will always be NULL.
streamId The stream id this buffer is associated with. This field is deprecated as of CUDA 6.0 and will always be NULL.
buffer The activity record buffer.
size The total size of the buffer in bytes as set in CUpti_BuffersCallbackRequestFunc.
validSize The number of valid bytes in the buffer.
+
+ +
+

+ +

+
+ + + + +
typedef void( * CUpti_BuffersCallbackRequestFunc)(uint8_t **buffer, size_t *size, size_t *maxNumRecords)
+
+
+ +

+This callback function signals the CUPTI client that an activity buffer is needed by CUPTI. The activity buffer is used by CUPTI to store activity records. The callback function can decline the request by setting *buffer to NULL. In this case CUPTI may drop activity records.

+

Parameters:
+ + + + +
buffer Returns the new buffer. If set to NULL then no buffer is returned.
size Returns the size of the returned buffer.
maxNumRecords Returns the maximum number of records that should be placed in the buffer. If 0 then the buffer is filled with as many records as possible. If > 0 the buffer is filled with at most that many records before it is returned.
+
+ +
+

+ +

+
+ + + + +
typedef uint64_t( * CUpti_TimestampCallbackFunc)(void)
+
+
+ +

+This callback function signals the CUPTI client that a timestamp needs to be returned. This timestamp would be treated as normalized timestamp to be used for various purposes in CUPTI. For example to store start and end timestamps reported in the CUPTI activity records. The returned timestamp must be in nanoseconds.

+

See also:
cuptiActivityRegisterTimestampCallback
+ +
+

+


Enumeration Type Documentation

+ +
+
+ + + + +
enum CUpti_ActivityAttribute
+
+
+ +

+These attributes are used to control the behavior of the activity API.

Enumerator:
+ + + + + + + + + + +
CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE  +The device memory size (in bytes) reserved for storing profiling data for concurrent kernels (activity kind CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL), memcopies and memsets for each buffer on a context. The value is a size_t.

+There is a limit on how many device buffers can be allocated per context. User can query and set this limit using the attribute CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT. CUPTI doesn't pre-allocate all the buffers, it pre-allocates only those many buffers as set by the attribute CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_PRE_ALLOCATE_VALUE. When all of the data in a buffer is consumed, it is added in the reuse pool, and CUPTI picks a buffer from this pool when a new buffer is needed. Thus memory footprint does not scale with the kernel count. Applications with the high density of kernels, memcopies and memsets might result in having CUPTI to allocate more device buffers. CUPTI allocates another buffer only when it runs out of the buffers in the reuse pool.

+Since buffer allocation happens in the main application thread, this might result in stalls in the critical path. CUPTI pre-allocates 3 buffers of the same size to mitigate this issue. User can query and set the pre-allocation limit using the attribute CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_PRE_ALLOCATE_VALUE.

+Having larger buffer size leaves less device memory for the application. Having smaller buffer size increases the risk of dropping timestamps for records if too many kernels or memcopies or memsets are launched at one time.

+This value only applies to new buffer allocations. Set this value before initializing CUDA or before creating a context to ensure it is considered for the following allocations.

+The default value is 3200000 (~3MB) which can accommodate profiling data up to 100,000 kernels, memcopies and memsets combined.

+Note: Starting with the CUDA 11.2 release, CUPTI allocates profiling buffer in the pinned host memory by default as this might help in improving the performance of the tracing run. Refer to the description of the attribute CUPTI_ACTIVITY_ATTR_MEM_ALLOCATION_TYPE_HOST_PINNED for more details. Size of the memory and maximum number of pools are still controlled by the attributes CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE and CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT.

+Note: The actual amount of device memory per buffer reserved by CUPTI might be larger.

CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE_CDP  +The device memory size (in bytes) reserved for storing profiling data for CDP operations for each buffer on a context. The value is a size_t.

+Having larger buffer size means less flush operations but consumes more device memory. This value only applies to new allocations.

+Set this value before initializing CUDA or before creating a context to ensure it is considered for the following allocations.

+The default value is 8388608 (8MB).

+Note: The actual amount of device memory per context reserved by CUPTI might be larger.

CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT  +The maximum number of device memory buffers per context. The value is a size_t.

+For an application with high rate of kernel launches, memcopies and memsets having a bigger pool limit helps in timestamp collection for all these activties at the expense of a larger memory footprint. Refer to the description of the attribute CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE for more details.

+Setting this value will not modify the number of memory buffers currently stored.

+Set this value before initializing CUDA to ensure the limit is not exceeded.

+The default value is 250.

CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_SIZE  +The profiling semaphore pool size reserved for storing profiling data for serialized kernels tracing (activity kind CUPTI_ACTIVITY_KIND_KERNEL) for each context. The value is a size_t.

+There is a limit on how many semaphore pools can be allocated per context. User can query and set this limit using the attribute CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_LIMIT. CUPTI doesn't pre-allocate all the semaphore pools, it pre-allocates only those many semaphore pools as set by the attribute CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_PRE_ALLOCATE_VALUE. When all of the data in a semaphore pool is consumed, it is added in the reuse pool, and CUPTI picks a semaphore pool from the reuse pool when a new semaphore pool is needed. Thus memory footprint does not scale with the kernel count. Applications with the high density of kernels might result in having CUPTI to allocate more semaphore pools. CUPTI allocates another semaphore pool only when it runs out of the semaphore pools in the reuse pool.

+Since semaphore pool allocation happens in the main application thread, this might result in stalls in the critical path. CUPTI pre-allocates 3 semaphore pools of the same size to mitigate this issue. User can query and set the pre-allocation limit using the attribute CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_PRE_ALLOCATE_VALUE.

+Having larger semaphore pool size leaves less device memory for the application. Having smaller semaphore pool size increases the risk of dropping timestamps for kernel records if too many kernels are issued/launched at one time.

+This value only applies to new semaphore pool allocations. Set this value before initializing CUDA or before creating a context to ensure it is considered for the following allocations.

+The default value is 25000 which can accommodate profiling data for upto 25,000 kernels.

CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_LIMIT  +The maximum number of profiling semaphore pools per context. The value is a size_t.

+For an application with high rate of kernel launches, having a bigger pool limit helps in timestamp collection for all the kernels, at the expense of a larger device memory footprint. Refer to the description of the attribute CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_SIZE for more details.

+Set this value before initializing CUDA to ensure the limit is not exceeded.

+The default value is 250.

CUPTI_ACTIVITY_ATTR_ZEROED_OUT_ACTIVITY_BUFFER  +The flag to indicate whether user should provide activity buffer of zero value. The value is a uint8_t.

+If the value of this attribute is non-zero, user should provide a zero value buffer in the CUpti_BuffersCallbackRequestFunc. If the user does not provide a zero value buffer after setting this to non-zero, the activity buffer may contain some uninitialized values when CUPTI returns it in CUpti_BuffersCallbackCompleteFunc

+If the value of this attribute is zero, CUPTI will initialize the user buffer received in the CUpti_BuffersCallbackRequestFunc to zero before filling it. If the user sets this to zero, a few stalls may appear in critical path because CUPTI will zero out the buffer in the main thread. Set this value before returning from CUpti_BuffersCallbackRequestFunc to ensure it is considered for all the subsequent user buffers.

+The default value is 0.

CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_PRE_ALLOCATE_VALUE  +Number of device buffers to pre-allocate for a context during the initialization phase. The value is a size_t.

+Refer to the description of the attribute CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE for details.

+This value must be less than the maximum number of device buffers set using the attribute CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT

+Set this value before initializing CUDA or before creating a context to ensure it is considered by the CUPTI.

+The default value is set to 3 to ping pong between these buffers (if possible).

CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_PRE_ALLOCATE_VALUE  +Number of profiling semaphore pools to pre-allocate for a context during the initialization phase. The value is a size_t.

+Refer to the description of the attribute CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_SIZE for details.

+This value must be less than the maximum number of profiling semaphore pools set using the attribute CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_LIMIT

+Set this value before initializing CUDA or before creating a context to ensure it is considered by the CUPTI.

+The default value is set to 3 to ping pong between these pools (if possible).

CUPTI_ACTIVITY_ATTR_MEM_ALLOCATION_TYPE_HOST_PINNED  +Allocate page-locked (pinned) host memory for storing profiling data for concurrent kernels, memcopies and memsets for each buffer on a context. The value is a uint8_t.

+Starting with the CUDA 11.2 release, CUPTI allocates profiling buffer in the pinned host memory by default as this might help in improving the performance of the tracing run. Allocating excessive amounts of pinned memory may degrade system performance, since it reduces the amount of memory available to the system for paging. For this reason user might want to change the location from pinned host memory to device memory by setting value of this attribute to 0.

+The default value is 1.

+
+ +
+

+ +

+ +
+ +

+

Enumerator:
+ + + + +
CUPTI_ACTIVITY_COMPUTE_API_UNKNOWN  +The compute API is not known.
CUPTI_ACTIVITY_COMPUTE_API_CUDA  +The compute APIs are for CUDA.
CUPTI_ACTIVITY_COMPUTE_API_CUDA_MPS  +The compute APIs are for CUDA running in MPS (Multi-Process Service) environment.
+
+ +
+

+ +

+ +
+ +

+

Enumerator:
+ + + + + + +
CUPTI_ACTIVITY_ENVIRONMENT_UNKNOWN  +Unknown data.
CUPTI_ACTIVITY_ENVIRONMENT_SPEED  +The environment data is related to speed.
CUPTI_ACTIVITY_ENVIRONMENT_TEMPERATURE  +The environment data is related to temperature.
CUPTI_ACTIVITY_ENVIRONMENT_POWER  +The environment data is related to power.
CUPTI_ACTIVITY_ENVIRONMENT_COOLING  +The environment data is related to cooling.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_ActivityFlag
+
+
+ +

+Activity record flags. Flags can be combined by bitwise OR to associated multiple flags with an activity record. Each flag is specific to a certain activity kind, as noted below.

Enumerator:
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUPTI_ACTIVITY_FLAG_NONE  +Indicates the activity record has no flags.
CUPTI_ACTIVITY_FLAG_DEVICE_CONCURRENT_KERNELS  +Indicates the activity represents a device that supports concurrent kernel execution. Valid for CUPTI_ACTIVITY_KIND_DEVICE.
CUPTI_ACTIVITY_FLAG_DEVICE_ATTRIBUTE_CUDEVICE  +Indicates if the activity represents a CUdevice_attribute value or a CUpti_DeviceAttribute value. Valid for CUPTI_ACTIVITY_KIND_DEVICE_ATTRIBUTE.
CUPTI_ACTIVITY_FLAG_MEMCPY_ASYNC  +Indicates the activity represents an asynchronous memcpy operation. Valid for CUPTI_ACTIVITY_KIND_MEMCPY.
CUPTI_ACTIVITY_FLAG_MARKER_INSTANTANEOUS  +Indicates the activity represents an instantaneous marker. Valid for CUPTI_ACTIVITY_KIND_MARKER.
CUPTI_ACTIVITY_FLAG_MARKER_START  +Indicates the activity represents a region start marker. Valid for CUPTI_ACTIVITY_KIND_MARKER.
CUPTI_ACTIVITY_FLAG_MARKER_END  +Indicates the activity represents a region end marker. Valid for CUPTI_ACTIVITY_KIND_MARKER.
CUPTI_ACTIVITY_FLAG_MARKER_SYNC_ACQUIRE  +Indicates the activity represents an attempt to acquire a user defined synchronization object. Valid for CUPTI_ACTIVITY_KIND_MARKER.
CUPTI_ACTIVITY_FLAG_MARKER_SYNC_ACQUIRE_SUCCESS  +Indicates the activity represents success in acquiring the user defined synchronization object. Valid for CUPTI_ACTIVITY_KIND_MARKER.
CUPTI_ACTIVITY_FLAG_MARKER_SYNC_ACQUIRE_FAILED  +Indicates the activity represents failure in acquiring the user defined synchronization object. Valid for CUPTI_ACTIVITY_KIND_MARKER.
CUPTI_ACTIVITY_FLAG_MARKER_SYNC_RELEASE  +Indicates the activity represents releasing a reservation on user defined synchronization object. Valid for CUPTI_ACTIVITY_KIND_MARKER.
CUPTI_ACTIVITY_FLAG_MARKER_COLOR_NONE  +Indicates the activity represents a marker that does not specify a color. Valid for CUPTI_ACTIVITY_KIND_MARKER_DATA.
CUPTI_ACTIVITY_FLAG_MARKER_COLOR_ARGB  +Indicates the activity represents a marker that specifies a color in alpha-red-green-blue format. Valid for CUPTI_ACTIVITY_KIND_MARKER_DATA.
CUPTI_ACTIVITY_FLAG_GLOBAL_ACCESS_KIND_SIZE_MASK  +The number of bytes requested by each thread Valid for CUpti_ActivityGlobalAccess3.
CUPTI_ACTIVITY_FLAG_GLOBAL_ACCESS_KIND_LOAD  +If bit in this flag is set, the access was load, else it is a store access. Valid for CUpti_ActivityGlobalAccess3.
CUPTI_ACTIVITY_FLAG_GLOBAL_ACCESS_KIND_CACHED  +If this bit in flag is set, the load access was cached else it is uncached. Valid for CUpti_ActivityGlobalAccess3.
CUPTI_ACTIVITY_FLAG_METRIC_OVERFLOWED  +If this bit in flag is set, the metric value overflowed. Valid for CUpti_ActivityMetric and CUpti_ActivityMetricInstance.
CUPTI_ACTIVITY_FLAG_METRIC_VALUE_INVALID  +If this bit in flag is set, the metric value couldn't be calculated. This occurs when a value(s) required to calculate the metric is missing. Valid for CUpti_ActivityMetric and CUpti_ActivityMetricInstance.
CUPTI_ACTIVITY_FLAG_INSTRUCTION_VALUE_INVALID  +If this bit in flag is set, the source level metric value couldn't be calculated. This occurs when a value(s) required to calculate the source level metric cannot be evaluated. Valid for CUpti_ActivityInstructionExecution.
CUPTI_ACTIVITY_FLAG_INSTRUCTION_CLASS_MASK  +The mask for the instruction class, CUpti_ActivityInstructionClass Valid for CUpti_ActivityInstructionExecution and CUpti_ActivityInstructionCorrelation
CUPTI_ACTIVITY_FLAG_FLUSH_FORCED  +When calling cuptiActivityFlushAll, this flag can be set to force CUPTI to flush all records in the buffer, whether finished or not
CUPTI_ACTIVITY_FLAG_SHARED_ACCESS_KIND_SIZE_MASK  +The number of bytes requested by each thread Valid for CUpti_ActivitySharedAccess.
CUPTI_ACTIVITY_FLAG_SHARED_ACCESS_KIND_LOAD  +If bit in this flag is set, the access was load, else it is a store access. Valid for CUpti_ActivitySharedAccess.
CUPTI_ACTIVITY_FLAG_MEMSET_ASYNC  +Indicates the activity represents an asynchronous memset operation. Valid for CUPTI_ACTIVITY_KIND_MEMSET.
CUPTI_ACTIVITY_FLAG_THRASHING_IN_CPU  +Indicates the activity represents thrashing in CPU. Valid for counter of kind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THRASHING in CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER
CUPTI_ACTIVITY_FLAG_THROTTLING_IN_CPU  +Indicates the activity represents page throttling in CPU. Valid for counter of kind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THROTTLING in CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER
+
+ +
+

+ +

+ +
+ +

+The sass instruction are broadly divided into different class. Each enum represents a classification.

Enumerator:
+ + + + + + + + + + + + + + + + + + + + + + +
CUPTI_ACTIVITY_INSTRUCTION_CLASS_UNKNOWN  +The instruction class is not known.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_FP_32  +Represents a 32 bit floating point operation.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_FP_64  +Represents a 64 bit floating point operation.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_INTEGER  +Represents an integer operation.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_BIT_CONVERSION  +Represents a bit conversion operation.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_CONTROL_FLOW  +Represents a control flow instruction.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_GLOBAL  +Represents a global load-store instruction.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_SHARED  +Represents a shared load-store instruction.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_LOCAL  +Represents a local load-store instruction.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_GENERIC  +Represents a generic load-store instruction.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_SURFACE  +Represents a surface load-store instruction.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_CONSTANT  +Represents a constant load instruction.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_TEXTURE  +Represents a texture load-store instruction.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_GLOBAL_ATOMIC  +Represents a global atomic instruction.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_SHARED_ATOMIC  +Represents a shared atomic instruction.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_SURFACE_ATOMIC  +Represents a surface atomic instruction.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_INTER_THREAD_COMMUNICATION  +Represents a inter-thread communication instruction.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_BARRIER  +Represents a barrier instruction.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_MISCELLANEOUS  +Represents some miscellaneous instructions which do not fit in the above classification.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_FP_16  +Represents a 16 bit floating point operation.
CUPTI_ACTIVITY_INSTRUCTION_CLASS_UNIFORM  +Represents uniform instruction.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_ActivityJitEntryType
+
+
+ +

+To be used in CUpti_ActivityJit.

Enumerator:
+ + + +
CUPTI_ACTIVITY_JIT_ENTRY_PTX_TO_CUBIN  +PTX to CUBIN.
CUPTI_ACTIVITY_JIT_ENTRY_NVVM_IR_TO_PTX  +NVVM-IR to PTX
+
+ +
+

+ +

+ +
+ +

+To be used in CUpti_ActivityJit.

Enumerator:
+ + + + +
CUPTI_ACTIVITY_JIT_OPERATION_CACHE_LOAD  +Loaded from the compute cache.
CUPTI_ACTIVITY_JIT_OPERATION_CACHE_STORE  +Stored in the compute cache.
CUPTI_ACTIVITY_JIT_OPERATION_COMPILE  +JIT compilation.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_ActivityKind
+
+
+ +

+Each activity record kind represents information about a GPU or an activity occurring on a CPU or GPU. Each kind is associated with a activity record structure that holds the information associated with the kind.

See also:
CUpti_Activity

+CUpti_ActivityAPI

+CUpti_ActivityContext

+CUpti_ActivityDevice

+CUpti_ActivityDevice2

+CUpti_ActivityDevice3

+CUpti_ActivityDevice4

+CUpti_ActivityDeviceAttribute

+CUpti_ActivityEvent

+CUpti_ActivityEventInstance

+CUpti_ActivityKernel

+CUpti_ActivityKernel2

+CUpti_ActivityKernel3

+CUpti_ActivityKernel4

+CUpti_ActivityKernel5

+CUpti_ActivityKernel6

+CUpti_ActivityKernel7

+CUpti_ActivityKernel8

+CUpti_ActivityCdpKernel

+CUpti_ActivityPreemption

+CUpti_ActivityMemcpy

+CUpti_ActivityMemcpy3

+CUpti_ActivityMemcpy4

+CUpti_ActivityMemcpy5

+CUpti_ActivityMemcpyPtoP

+CUpti_ActivityMemcpyPtoP2

+CUpti_ActivityMemcpyPtoP3

+CUpti_ActivityMemcpyPtoP4

+CUpti_ActivityMemset

+CUpti_ActivityMemset2

+CUpti_ActivityMemset3

+CUpti_ActivityMemset4

+CUpti_ActivityMetric

+CUpti_ActivityMetricInstance

+CUpti_ActivityName

+CUpti_ActivityMarker

+CUpti_ActivityMarker2

+CUpti_ActivityMarkerData

+CUpti_ActivitySourceLocator

+CUpti_ActivityGlobalAccess

+CUpti_ActivityGlobalAccess2

+CUpti_ActivityGlobalAccess3

+CUpti_ActivityBranch

+CUpti_ActivityBranch2

+CUpti_ActivityOverhead

+CUpti_ActivityEnvironment

+CUpti_ActivityInstructionExecution

+CUpti_ActivityUnifiedMemoryCounter

+CUpti_ActivityFunction

+CUpti_ActivityModule

+CUpti_ActivitySharedAccess

+CUpti_ActivityPCSampling

+CUpti_ActivityPCSampling2

+CUpti_ActivityPCSampling3

+CUpti_ActivityPCSamplingRecordInfo

+CUpti_ActivityCudaEvent

+CUpti_ActivityStream

+CUpti_ActivitySynchronization

+CUpti_ActivityInstructionCorrelation

+CUpti_ActivityExternalCorrelation

+CUpti_ActivityUnifiedMemoryCounter2

+CUpti_ActivityOpenAccData

+CUpti_ActivityOpenAccLaunch

+CUpti_ActivityOpenAccOther

+CUpti_ActivityOpenMp

+CUpti_ActivityNvLink

+CUpti_ActivityNvLink2

+CUpti_ActivityNvLink3

+CUpti_ActivityNvLink4

+CUpti_ActivityMemory

+CUpti_ActivityPcie

+
Enumerator:
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUPTI_ACTIVITY_KIND_INVALID  +The activity record is invalid.
CUPTI_ACTIVITY_KIND_MEMCPY  +A host<->host, host<->device, or device<->device memory copy. The corresponding activity record structure is CUpti_ActivityMemcpy5.
CUPTI_ACTIVITY_KIND_MEMSET  +A memory set executing on the GPU. The corresponding activity record structure is CUpti_ActivityMemset4.
CUPTI_ACTIVITY_KIND_KERNEL  +A kernel executing on the GPU. This activity kind may significantly change the overall performance characteristics of the application because all kernel executions are serialized on the GPU. Other activity kind for kernel CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL doesn't break kernel concurrency. The corresponding activity record structure is CUpti_ActivityKernel8.
CUPTI_ACTIVITY_KIND_DRIVER  +A CUDA driver API function execution. The corresponding activity record structure is CUpti_ActivityAPI.
CUPTI_ACTIVITY_KIND_RUNTIME  +A CUDA runtime API function execution. The corresponding activity record structure is CUpti_ActivityAPI.
CUPTI_ACTIVITY_KIND_EVENT  +An event value. The corresponding activity record structure is CUpti_ActivityEvent.
CUPTI_ACTIVITY_KIND_METRIC  +A metric value. The corresponding activity record structure is CUpti_ActivityMetric.
CUPTI_ACTIVITY_KIND_DEVICE  +Information about a device. The corresponding activity record structure is CUpti_ActivityDevice4.
CUPTI_ACTIVITY_KIND_CONTEXT  +Information about a context. The corresponding activity record structure is CUpti_ActivityContext.
CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL  +A kernel executing on the GPU. This activity kind doesn't break kernel concurrency. The corresponding activity record structure is CUpti_ActivityKernel8.
CUPTI_ACTIVITY_KIND_NAME  +Resource naming done via NVTX APIs for thread, device, context, etc. The corresponding activity record structure is CUpti_ActivityName.
CUPTI_ACTIVITY_KIND_MARKER  +Instantaneous, start, or end NVTX marker. The corresponding activity record structure is CUpti_ActivityMarker2.
CUPTI_ACTIVITY_KIND_MARKER_DATA  +Extended, optional, data about a marker. The corresponding activity record structure is CUpti_ActivityMarkerData.
CUPTI_ACTIVITY_KIND_SOURCE_LOCATOR  +Source information about source level result. The corresponding activity record structure is CUpti_ActivitySourceLocator.
CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS  +Results for source-level global acccess. The corresponding activity record structure is CUpti_ActivityGlobalAccess3.
CUPTI_ACTIVITY_KIND_BRANCH  +Results for source-level branch. The corresponding activity record structure is CUpti_ActivityBranch2.
CUPTI_ACTIVITY_KIND_OVERHEAD  +Overhead activity records. The corresponding activity record structure is CUpti_ActivityOverhead.
CUPTI_ACTIVITY_KIND_CDP_KERNEL  +A CDP (CUDA Dynamic Parallel) kernel executing on the GPU. The corresponding activity record structure is CUpti_ActivityCdpKernel. This activity can not be directly enabled or disabled. It is enabled and disabled through concurrent kernel activity i.e. _CONCURRENT_KERNEL.
CUPTI_ACTIVITY_KIND_PREEMPTION  +Preemption activity record indicating a preemption of a CDP (CUDA Dynamic Parallel) kernel executing on the GPU. The corresponding activity record structure is CUpti_ActivityPreemption.
CUPTI_ACTIVITY_KIND_ENVIRONMENT  +Environment activity records indicating power, clock, thermal, etc. levels of the GPU. The corresponding activity record structure is CUpti_ActivityEnvironment.
CUPTI_ACTIVITY_KIND_EVENT_INSTANCE  +An event value associated with a specific event domain instance. The corresponding activity record structure is CUpti_ActivityEventInstance.
CUPTI_ACTIVITY_KIND_MEMCPY2  +A peer to peer memory copy. The corresponding activity record structure is CUpti_ActivityMemcpyPtoP4.
CUPTI_ACTIVITY_KIND_METRIC_INSTANCE  +A metric value associated with a specific metric domain instance. The corresponding activity record structure is CUpti_ActivityMetricInstance.
CUPTI_ACTIVITY_KIND_INSTRUCTION_EXECUTION  +Results for source-level instruction execution. The corresponding activity record structure is CUpti_ActivityInstructionExecution.
CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER  +Unified Memory counter record. The corresponding activity record structure is CUpti_ActivityUnifiedMemoryCounter2.
CUPTI_ACTIVITY_KIND_FUNCTION  +Device global/function record. The corresponding activity record structure is CUpti_ActivityFunction.
CUPTI_ACTIVITY_KIND_MODULE  +CUDA Module record. The corresponding activity record structure is CUpti_ActivityModule.
CUPTI_ACTIVITY_KIND_DEVICE_ATTRIBUTE  +A device attribute value. The corresponding activity record structure is CUpti_ActivityDeviceAttribute.
CUPTI_ACTIVITY_KIND_SHARED_ACCESS  +Results for source-level shared acccess. The corresponding activity record structure is CUpti_ActivitySharedAccess.
CUPTI_ACTIVITY_KIND_PC_SAMPLING  +Enable PC sampling for kernels. This will serialize kernels. The corresponding activity record structure is CUpti_ActivityPCSampling3.
CUPTI_ACTIVITY_KIND_PC_SAMPLING_RECORD_INFO  +Summary information about PC sampling records. The corresponding activity record structure is CUpti_ActivityPCSamplingRecordInfo.
CUPTI_ACTIVITY_KIND_INSTRUCTION_CORRELATION  +SASS/Source line-by-line correlation record. This will generate sass/source correlation for functions that have source level analysis or pc sampling results. The records will be generated only when either of source level analysis or pc sampling activity is enabled. The corresponding activity record structure is CUpti_ActivityInstructionCorrelation.
CUPTI_ACTIVITY_KIND_OPENACC_DATA  +OpenACC data events. The corresponding activity record structure is CUpti_ActivityOpenAccData.
CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH  +OpenACC launch events. The corresponding activity record structure is CUpti_ActivityOpenAccLaunch.
CUPTI_ACTIVITY_KIND_OPENACC_OTHER  +OpenACC other events. The corresponding activity record structure is CUpti_ActivityOpenAccOther.
CUPTI_ACTIVITY_KIND_CUDA_EVENT  +Information about a CUDA event. The corresponding activity record structure is CUpti_ActivityCudaEvent.
CUPTI_ACTIVITY_KIND_STREAM  +Information about a CUDA stream. The corresponding activity record structure is CUpti_ActivityStream.
CUPTI_ACTIVITY_KIND_SYNCHRONIZATION  +Records for synchronization management. The corresponding activity record structure is CUpti_ActivitySynchronization.
CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION  +Records for correlation of different programming APIs. The corresponding activity record structure is CUpti_ActivityExternalCorrelation.
CUPTI_ACTIVITY_KIND_NVLINK  +NVLink information. The corresponding activity record structure is CUpti_ActivityNvLink4.
CUPTI_ACTIVITY_KIND_INSTANTANEOUS_EVENT  +Instantaneous Event information. The corresponding activity record structure is CUpti_ActivityInstantaneousEvent.
CUPTI_ACTIVITY_KIND_INSTANTANEOUS_EVENT_INSTANCE  +Instantaneous Event information for a specific event domain instance. The corresponding activity record structure is CUpti_ActivityInstantaneousEventInstance
CUPTI_ACTIVITY_KIND_INSTANTANEOUS_METRIC  +Instantaneous Metric information The corresponding activity record structure is CUpti_ActivityInstantaneousMetric.
CUPTI_ACTIVITY_KIND_INSTANTANEOUS_METRIC_INSTANCE  +Instantaneous Metric information for a specific metric domain instance. The corresponding activity record structure is CUpti_ActivityInstantaneousMetricInstance.
CUPTI_ACTIVITY_KIND_MEMORY  +Memory activity tracking allocation and freeing of the memory The corresponding activity record structure is CUpti_ActivityMemory.
CUPTI_ACTIVITY_KIND_PCIE  +PCI devices information used for PCI topology. The corresponding activity record structure is CUpti_ActivityPcie.
CUPTI_ACTIVITY_KIND_OPENMP  +OpenMP parallel events. The corresponding activity record structure is CUpti_ActivityOpenMp.
CUPTI_ACTIVITY_KIND_INTERNAL_LAUNCH_API  +A CUDA driver kernel launch occurring outside of any public API function execution. Tools can handle these like records for driver API launch functions, although the cbid field is not used here. The corresponding activity record structure is CUpti_ActivityAPI.
CUPTI_ACTIVITY_KIND_MEMORY2  +Memory activity tracking allocation and freeing of the memory The corresponding activity record structure is CUpti_ActivityMemory3.
CUPTI_ACTIVITY_KIND_MEMORY_POOL  +Memory pool activity tracking creation, destruction and triming of the memory pool. The corresponding activity record structure is CUpti_ActivityMemoryPool2.
CUPTI_ACTIVITY_KIND_GRAPH_TRACE  +The corresponding activity record structure is CUpti_ActivityGraphTrace.
CUPTI_ACTIVITY_KIND_JIT  +JIT operation tracking The corresponding activity record structure is CUpti_ActivityJit.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_ActivityLaunchType
+
+
+ +

+

Enumerator:
+ + + + +
CUPTI_ACTIVITY_LAUNCH_TYPE_REGULAR  +The kernel was launched via a regular kernel call
CUPTI_ACTIVITY_LAUNCH_TYPE_COOPERATIVE_SINGLE_DEVICE  +The kernel was launched via API cudaLaunchCooperativeKernel() or cuLaunchCooperativeKernel()
CUPTI_ACTIVITY_LAUNCH_TYPE_COOPERATIVE_MULTI_DEVICE  +The kernel was launched via API cudaLaunchCooperativeKernelMultiDevice() or cuLaunchCooperativeKernelMultiDevice()
+
+ +
+

+ +

+
+ + + + +
enum CUpti_ActivityMemcpyKind
+
+
+ +

+Each kind represents the source and destination targets of a memory copy. Targets are host, device, and array.

Enumerator:
+ + + + + + + + + + + + +
CUPTI_ACTIVITY_MEMCPY_KIND_UNKNOWN  +The memory copy kind is not known.
CUPTI_ACTIVITY_MEMCPY_KIND_HTOD  +A host to device memory copy.
CUPTI_ACTIVITY_MEMCPY_KIND_DTOH  +A device to host memory copy.
CUPTI_ACTIVITY_MEMCPY_KIND_HTOA  +A host to device array memory copy.
CUPTI_ACTIVITY_MEMCPY_KIND_ATOH  +A device array to host memory copy.
CUPTI_ACTIVITY_MEMCPY_KIND_ATOA  +A device array to device array memory copy.
CUPTI_ACTIVITY_MEMCPY_KIND_ATOD  +A device array to device memory copy.
CUPTI_ACTIVITY_MEMCPY_KIND_DTOA  +A device to device array memory copy.
CUPTI_ACTIVITY_MEMCPY_KIND_DTOD  +A device to device memory copy on the same device.
CUPTI_ACTIVITY_MEMCPY_KIND_HTOH  +A host to host memory copy.
CUPTI_ACTIVITY_MEMCPY_KIND_PTOP  +A peer to peer memory copy across different devices.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_ActivityMemoryKind
+
+
+ +

+Each kind represents the type of the memory accessed by a memory operation/copy.

Enumerator:
+ + + + + + + + + +
CUPTI_ACTIVITY_MEMORY_KIND_UNKNOWN  +The memory kind is unknown.
CUPTI_ACTIVITY_MEMORY_KIND_PAGEABLE  +The memory is pageable.
CUPTI_ACTIVITY_MEMORY_KIND_PINNED  +The memory is pinned.
CUPTI_ACTIVITY_MEMORY_KIND_DEVICE  +The memory is on the device.
CUPTI_ACTIVITY_MEMORY_KIND_ARRAY  +The memory is an array.
CUPTI_ACTIVITY_MEMORY_KIND_MANAGED  +The memory is managed
CUPTI_ACTIVITY_MEMORY_KIND_DEVICE_STATIC  +The memory is device static
CUPTI_ACTIVITY_MEMORY_KIND_MANAGED_STATIC  +The memory is managed static
+
+ +
+

+ +

+ +
+ +

+Describes the type of memory operation, to be used with CUpti_ActivityMemory3.

Enumerator:
+ + + +
CUPTI_ACTIVITY_MEMORY_OPERATION_TYPE_ALLOCATION  +Memory is allocated.
CUPTI_ACTIVITY_MEMORY_OPERATION_TYPE_RELEASE  +Memory is released.
+
+ +
+

+ +

+ +
+ +

+Describes the type of memory pool operation, to be used with CUpti_ActivityMemoryPool2.

Enumerator:
+ + + + +
CUPTI_ACTIVITY_MEMORY_POOL_OPERATION_TYPE_CREATED  +Memory pool is created.
CUPTI_ACTIVITY_MEMORY_POOL_OPERATION_TYPE_DESTROYED  +Memory pool is destroyed.
CUPTI_ACTIVITY_MEMORY_POOL_OPERATION_TYPE_TRIMMED  +Memory pool is trimmed.
+
+ +
+

+ +

+ +
+ +

+Describes the type of memory pool, to be used with CUpti_ActivityMemory3.

Enumerator:
+ + + +
CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL  +Memory pool is local to the process.
CUPTI_ACTIVITY_MEMORY_POOL_TYPE_IMPORTED  +Memory pool is imported by the process.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_ActivityObjectKind
+
+
+ +

+

See also:
CUpti_ActivityObjectKindId
+
Enumerator:
+ + + + + + + +
CUPTI_ACTIVITY_OBJECT_UNKNOWN  +The object kind is not known.
CUPTI_ACTIVITY_OBJECT_PROCESS  +A process.
CUPTI_ACTIVITY_OBJECT_THREAD  +A thread.
CUPTI_ACTIVITY_OBJECT_DEVICE  +A device.
CUPTI_ACTIVITY_OBJECT_CONTEXT  +A context.
CUPTI_ACTIVITY_OBJECT_STREAM  +A stream.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_ActivityOverheadKind
+
+
+ +

+

Enumerator:
+ + + + + + +
CUPTI_ACTIVITY_OVERHEAD_UNKNOWN  +The overhead kind is not known.
CUPTI_ACTIVITY_OVERHEAD_DRIVER_COMPILER  +Compiler(JIT) overhead.
CUPTI_ACTIVITY_OVERHEAD_CUPTI_BUFFER_FLUSH  +Activity buffer flush overhead.
CUPTI_ACTIVITY_OVERHEAD_CUPTI_INSTRUMENTATION  +CUPTI instrumentation overhead.
CUPTI_ACTIVITY_OVERHEAD_CUPTI_RESOURCE  +CUPTI resource creation and destruction overhead.
+
+ +
+

+ +

+ +
+ +

+

Enumerator:
+ + + + + +
CUPTI_ACTIVITY_PARTITIONED_GLOBAL_CACHE_CONFIG_UNKNOWN  +Partitioned global cache config unknown.
CUPTI_ACTIVITY_PARTITIONED_GLOBAL_CACHE_CONFIG_NOT_SUPPORTED  +Partitioned global cache not supported.
CUPTI_ACTIVITY_PARTITIONED_GLOBAL_CACHE_CONFIG_OFF  +Partitioned global cache config off.
CUPTI_ACTIVITY_PARTITIONED_GLOBAL_CACHE_CONFIG_ON  +Partitioned global cache config on.
+
+ +
+

+ +

+ +
+ +

+Sampling period can be set using cuptiActivityConfigurePCSampling

Enumerator:
+ + + + + + + +
CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_INVALID  +The PC sampling period is not set.
CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_MIN  +Minimum sampling period available on the device.
CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_LOW  +Sampling period in lower range.
CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_MID  +Medium sampling period.
CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_HIGH  +Sampling period in higher range.
CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_MAX  +Maximum sampling period available on the device.
+
+ +
+

+ +

+ +
+ +

+

Enumerator:
+ + + + + + + + + + + + + + +
CUPTI_ACTIVITY_PC_SAMPLING_STALL_INVALID  +Invalid reason
CUPTI_ACTIVITY_PC_SAMPLING_STALL_NONE  +No stall, instruction is selected for issue
CUPTI_ACTIVITY_PC_SAMPLING_STALL_INST_FETCH  +Warp is blocked because next instruction is not yet available, because of instruction cache miss, or because of branching effects
CUPTI_ACTIVITY_PC_SAMPLING_STALL_EXEC_DEPENDENCY  +Instruction is waiting on an arithmatic dependency
CUPTI_ACTIVITY_PC_SAMPLING_STALL_MEMORY_DEPENDENCY  +Warp is blocked because it is waiting for a memory access to complete.
CUPTI_ACTIVITY_PC_SAMPLING_STALL_TEXTURE  +Texture sub-system is fully utilized or has too many outstanding requests.
CUPTI_ACTIVITY_PC_SAMPLING_STALL_SYNC  +Warp is blocked as it is waiting at __syncthreads() or at memory barrier.
CUPTI_ACTIVITY_PC_SAMPLING_STALL_CONSTANT_MEMORY_DEPENDENCY  +Warp is blocked waiting for __constant__ memory and immediate memory access to complete.
CUPTI_ACTIVITY_PC_SAMPLING_STALL_PIPE_BUSY  +Compute operation cannot be performed due to the required resources not being available.
CUPTI_ACTIVITY_PC_SAMPLING_STALL_MEMORY_THROTTLE  +Warp is blocked because there are too many pending memory operations. In Kepler architecture it often indicates high number of memory replays.
CUPTI_ACTIVITY_PC_SAMPLING_STALL_NOT_SELECTED  +Warp was ready to issue, but some other warp issued instead.
CUPTI_ACTIVITY_PC_SAMPLING_STALL_OTHER  +Miscellaneous reasons
CUPTI_ACTIVITY_PC_SAMPLING_STALL_SLEEPING  +Sleeping.
+
+ +
+

+ +

+ +
+ +

+

Enumerator:
+ + + + +
CUPTI_ACTIVITY_PREEMPTION_KIND_UNKNOWN  +The preemption kind is not known.
CUPTI_ACTIVITY_PREEMPTION_KIND_SAVE  +Preemption to save CDP block.
CUPTI_ACTIVITY_PREEMPTION_KIND_RESTORE  +Preemption to restore CDP block.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_ActivityStreamFlag
+
+
+ +

+The types of stream to be used with CUpti_ActivityStream.

Enumerator:
+ + + + + + +
CUPTI_ACTIVITY_STREAM_CREATE_FLAG_UNKNOWN  +Unknown data.
CUPTI_ACTIVITY_STREAM_CREATE_FLAG_DEFAULT  +Default stream.
CUPTI_ACTIVITY_STREAM_CREATE_FLAG_NON_BLOCKING  +Non-blocking stream.
CUPTI_ACTIVITY_STREAM_CREATE_FLAG_NULL  +Null stream.
CUPTI_ACTIVITY_STREAM_CREATE_MASK  +Stream create Mask
+
+ +
+

+ +

+ +
+ +

+The types of synchronization to be used with CUpti_ActivitySynchronization.

Enumerator:
+ + + + + + +
CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_UNKNOWN  +Unknown data.
CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_EVENT_SYNCHRONIZE  +Event synchronize API.
CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_STREAM_WAIT_EVENT  +Stream wait event API.
CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_STREAM_SYNCHRONIZE  +Stream synchronize API.
CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_CONTEXT_SYNCHRONIZE  +Context synchronize API.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_ActivityThreadIdType
+
+
+ +

+CUPTI uses different methods to obtain the thread-id depending on the support and the underlying platform. This enum documents these methods for each type. APIs cuptiSetThreadIdType and cuptiGetThreadIdType can be used to set and get the thread-id type.

Enumerator:
+ + + +
CUPTI_ACTIVITY_THREAD_ID_TYPE_DEFAULT  +Default type Windows uses API GetCurrentThreadId() Linux/Mac/Android/QNX use POSIX pthread API pthread_self()
CUPTI_ACTIVITY_THREAD_ID_TYPE_SYSTEM  +This type is based on the system API available on the underlying platform and thread-id obtained is supposed to be unique for the process lifetime. Windows uses API GetCurrentThreadId() Linux uses syscall SYS_gettid Mac uses syscall SYS_thread_selfid Android/QNX use gettid()
+
+ +
+

+ +

+ +
+ +

+This is valid for CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT and CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT

Enumerator:
+ + + + + + +
CUPTI_ACTIVITY_UNIFIED_MEMORY_ACCESS_TYPE_UNKNOWN  +The unified memory access type is not known
CUPTI_ACTIVITY_UNIFIED_MEMORY_ACCESS_TYPE_READ  +The page fault was triggered by read memory instruction
CUPTI_ACTIVITY_UNIFIED_MEMORY_ACCESS_TYPE_WRITE  +The page fault was triggered by write memory instruction
CUPTI_ACTIVITY_UNIFIED_MEMORY_ACCESS_TYPE_ATOMIC  +The page fault was triggered by atomic memory instruction
CUPTI_ACTIVITY_UNIFIED_MEMORY_ACCESS_TYPE_PREFETCH  +The page fault was triggered by memory prefetch operation
+
+ +
+

+ +

+ +
+ +

+Many activities are associated with Unified Memory mechanism; among them are tranfer from host to device, device to host, page fault at host side.

Enumerator:
+ + + + + + + + + + +
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_UNKNOWN  +The unified memory counter kind is not known.
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD  +Number of bytes transfered from host to device
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH  +Number of bytes transfered from device to host
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT  +Number of CPU page faults, this is only supported on 64 bit Linux and Mac platforms
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT  +Number of GPU page faults, this is only supported on devices with compute capability 6.0 and higher and 64 bit Linux platforms
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THRASHING  +Thrashing occurs when data is frequently accessed by multiple processors and has to be constantly migrated around to achieve data locality. In this case the overhead of migration may exceed the benefits of locality. This is only supported on 64 bit Linux platforms.
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THROTTLING  +Throttling is a prevention technique used by the driver to avoid further thrashing. Here, the driver doesn't service the fault for one of the contending processors for a specific period of time, so that the other processor can run at full-speed. This is only supported on 64 bit Linux platforms.
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_REMOTE_MAP  +In case throttling does not help, the driver tries to pin the memory to a processor for a specific period of time. One of the contending processors will have slow access to the memory, while the other will have fast access. This is only supported on 64 bit Linux platforms.
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOD  +Number of bytes transferred from one device to another device. This is only supported on 64 bit Linux platforms.
+
+ +
+

+ +

+ +
+ +

+

Enumerator:
+ + + + +
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_UNKNOWN  +The unified memory counter scope is not known.
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE  +Collect unified memory counter for single process on one device
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_ALL_DEVICES  +Collect unified memory counter for single process across all devices
+
+ +
+

+ +

+ +
+ +

+This is valid for CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD and CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH

Enumerator:
+ + + + + + + +
CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_UNKNOWN  +The unified memory migration cause is not known
CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_USER  +The unified memory migrated due to an explicit call from the user e.g. cudaMemPrefetchAsync
CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_COHERENCE  +The unified memory migrated to guarantee data coherence e.g. CPU/GPU faults on Pascal+ and kernel launch on pre-Pascal GPUs
CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_PREFETCH  +The unified memory was speculatively migrated by the UVM driver before being accessed by the destination processor to improve performance
CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_EVICTION  +The unified memory migrated to the CPU because it was evicted to make room for another block of memory on the GPU
CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_ACCESS_COUNTERS  +The unified memory migrated to another processor because of access counter notifications. Only frequently accessed pages are migrated between CPU and GPU, or between peer GPUs.
+
+ +
+

+ +

+ +
+ +

+This is valid for CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_REMOTE_MAP

Enumerator:
+ + + + + + + +
CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_UNKNOWN  +The cause of mapping to remote memory was unknown
CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_COHERENCE  +Mapping to remote memory was added to maintain data coherence.
CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_THRASHING  +Mapping to remote memory was added to prevent further thrashing
CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_POLICY  +Mapping to remote memory was added to enforce the hints specified by the programmer or by performance heuristics of the UVM driver
CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_OUT_OF_MEMORY  +Mapping to remote memory was added because there is no more memory available on the processor and eviction was not possible
CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_EVICTION  +Mapping to remote memory was added after the memory was evicted to make room for another block of memory on the GPU
+
+ +
+

+ +

+ +
+ +

+This indicates the virtualization mode in which CUDA device is running

Enumerator:
+ + + + +
CUPTI_DEVICE_VIRTUALIZATION_MODE_NONE  +No virtualization mode isassociated with the device i.e. it's a baremetal GPU
CUPTI_DEVICE_VIRTUALIZATION_MODE_PASS_THROUGH  +The device is associated with the pass-through GPU. In this mode, an entire physical GPU is directly assigned to one virtual machine (VM).
CUPTI_DEVICE_VIRTUALIZATION_MODE_VIRTUAL_GPU  +The device is associated with the virtual GPU (vGPU). In this mode multiple virtual machines (VMs) have simultaneous, direct access to a single physical GPU.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_DevType
+
+
+ +

+

Enumerator:
+ + + +
CUPTI_DEV_TYPE_GPU  +The device type is GPU.
CUPTI_DEV_TYPE_NPU  +The device type is NVLink processing unit in CPU.
+
+ +
+

+ +

+ +
+ +

+The possible reasons that a clock can be throttled. There can be more than one reason that a clock is being throttled so these types can be combined by bitwise OR. These are used in the clocksThrottleReason field in the Environment Activity Record.

Enumerator:
+ + + + + + + + +
CUPTI_CLOCKS_THROTTLE_REASON_GPU_IDLE  +Nothing is running on the GPU and the clocks are dropping to idle state.
CUPTI_CLOCKS_THROTTLE_REASON_USER_DEFINED_CLOCKS  +The GPU clocks are limited by a user specified limit.
CUPTI_CLOCKS_THROTTLE_REASON_SW_POWER_CAP  +A software power scaling algorithm is reducing the clocks below requested clocks.
CUPTI_CLOCKS_THROTTLE_REASON_HW_SLOWDOWN  +Hardware slowdown to reduce the clock by a factor of two or more is engaged. This is an indicator of one of the following: 1) Temperature is too high, 2) External power brake assertion is being triggered (e.g. by the system power supply), 3) Change in power state.
CUPTI_CLOCKS_THROTTLE_REASON_UNKNOWN  +Some unspecified factor is reducing the clocks.
CUPTI_CLOCKS_THROTTLE_REASON_UNSUPPORTED  +Throttle reason is not supported for this GPU.
CUPTI_CLOCKS_THROTTLE_REASON_NONE  +No clock throttling.
+
+ +
+

+ +

+ +
+ +

+Custom correlation kinds are reserved for usage in external tools.

+

See also:
CUpti_ActivityExternalCorrelation
+
Enumerator:
+ + + + + + + +
CUPTI_EXTERNAL_CORRELATION_KIND_UNKNOWN  +The external API is unknown to CUPTI
CUPTI_EXTERNAL_CORRELATION_KIND_OPENACC  +The external API is OpenACC
CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM0  +The external API is custom0
CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM1  +The external API is custom1
CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM2  +The external API is custom2
CUPTI_EXTERNAL_CORRELATION_KIND_SIZE  +Add new kinds before this line
+
+ +
+

+ +

+
+ + + + +
enum CUpti_LinkFlag
+
+
+ +

+Describes link properties, to be used with CUpti_ActivityNvLink.

Enumerator:
+ + + + + +
CUPTI_LINK_FLAG_PEER_ACCESS  +Is peer to peer access supported by this link.
CUPTI_LINK_FLAG_SYSMEM_ACCESS  +Is system memory access supported by this link.
CUPTI_LINK_FLAG_PEER_ATOMICS  +Is peer atomic access supported by this link.
CUPTI_LINK_FLAG_SYSMEM_ATOMICS  +Is system memory atomic access supported by this link.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_OpenAccEventKind
+
+
+ +

+

See also:
CUpti_ActivityKindOpenAcc
+ +
+

+ +

+
+ + + + +
enum CUpti_PcieDeviceType
+
+
+ +

+Field to differentiate whether PCIE Activity record is of a GPU or a PCI Bridge

Enumerator:
+ + + +
CUPTI_PCIE_DEVICE_TYPE_GPU  +PCIE GPU record
CUPTI_PCIE_DEVICE_TYPE_BRIDGE  +PCIE Bridge record
+
+ +
+

+ +

+
+ + + + +
enum CUpti_PcieGen
+
+
+ +

+Enumeration of PCIE Generation for pcie activity attribute pcieGeneration

Enumerator:
+ + + + + + +
CUPTI_PCIE_GEN_GEN1  +PCIE Generation 1
CUPTI_PCIE_GEN_GEN2  +PCIE Generation 2
CUPTI_PCIE_GEN_GEN3  +PCIE Generation 3
CUPTI_PCIE_GEN_GEN4  +PCIE Generation 4
CUPTI_PCIE_GEN_GEN5  +PCIE Generation 5
+
+ +
+

+


Function Documentation

+ +
+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiActivityConfigurePCSampling (CUcontext  ctx,
CUpti_ActivityPCSamplingConfig config 
)
+
+
+ +

+For Pascal and older GPU architectures this API must be called before enabling activity kind CUPTI_ACTIVITY_KIND_PC_SAMPLING. There is no such requirement for Volta and newer GPU architectures.

+For Volta and newer GPU architectures if this API is called in the middle of execution, PC sampling configuration will be updated for subsequent kernel launches.

+

Parameters:
+ + + +
ctx The context
config A pointer to CUpti_ActivityPCSamplingConfig structure containing PC sampling configuration.
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_OPERATION if this api is called while some valid event collection method is set.
CUPTI_ERROR_INVALID_PARAMETER if config is NULL or any parameter in the config structures is not a valid value
CUPTI_ERROR_NOT_SUPPORTED Indicates that the system/device does not support the unified memory counters
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiActivityConfigureUnifiedMemoryCounter (CUpti_ActivityUnifiedMemoryCounterConfig config,
uint32_t  count 
)
+
+
+ +

+

Parameters:
+ + + +
config A pointer to CUpti_ActivityUnifiedMemoryCounterConfig structures containing Unified Memory counter configuration.
count Number of Unified Memory counter configuration structures
+
+
Return values:
+ + + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_PARAMETER if config is NULL or any parameter in the config structures is not a valid value
CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED One potential reason is that platform (OS/arch) does not support the unified memory counters
CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE Indicates that the device does not support the unified memory counters
CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES Indicates that multi-GPU configuration without P2P support between any pair of devices does not support the unified memory counters
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiActivityDisable (CUpti_ActivityKind  kind  ) 
+
+
+ +

+Disable collection of a specific kind of activity record. Multiple kinds can be disabled by calling this function multiple times. By default all activity kinds are disabled for collection.

+

Parameters:
+ + +
kind The kind of activity record to stop collecting
+
+
Return values:
+ + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_KIND if the activity kind is not supported
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiActivityDisableContext (CUcontext  context,
CUpti_ActivityKind  kind 
)
+
+
+ +

+Disable collection of a specific kind of activity record for a context. This setting done by this API will supersede the global settings for activity records. Multiple kinds can be enabled by calling this function multiple times.

+

Parameters:
+ + + +
context The context for which activity is to be disabled
kind The kind of activity record to stop collecting
+
+
Return values:
+ + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_KIND if the activity kind is not supported
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiActivityEnable (CUpti_ActivityKind  kind  ) 
+
+
+ +

+Enable collection of a specific kind of activity record. Multiple kinds can be enabled by calling this function multiple times. By default all activity kinds are disabled for collection.

+

Parameters:
+ + +
kind The kind of activity record to collect
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_NOT_COMPATIBLE if the activity kind cannot be enabled
CUPTI_ERROR_INVALID_KIND if the activity kind is not supported
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiActivityEnableAndDump (CUpti_ActivityKind  kind  ) 
+
+
+ +

+In general, the behavior of this API is similar to the API cuptiActivityEnable i.e. it enables the collection of a specific kind of activity record. Additionally, this API can help in dumping the records for activities which happened in the past before enabling the corresponding activity kind. The API allows to get records for the current resource allocations done in CUDA For CUPTI_ACTIVITY_KIND_DEVICE, existing device records are dumped For CUPTI_ACTIVITY_KIND_CONTEXT, existing context records are dumped For CUPTI_ACTIVITY_KIND_STREAM, existing stream records are dumped For CUPTI_ACTIVITY_KIND_ NVLINK, existing NVLINK records are dumped For CUPTI_ACTIVITY_KIND_PCIE, existing PCIE records are dumped For other activities, the behavior is similar to the API cuptiActivityEnable

+Device records are emitted in CUPTI on CUDA driver initialization. Those records can only be retrieved by the user if CUPTI is attached before CUDA initialization. Context and stream records are emitted on context and stream creation. The use case of the API is to provide the records for CUDA resources (contexs/streams/devices) that are currently active if user late attaches CUPTI.

+Before calling this function, the user must register buffer callbacks to get the activity records by calling cuptiActivityRegisterCallbacks. If the user does not register the buffers and calls API cuptiActivityEnableAndDump, then CUPTI will enable the activity kind but not provide any records for that activity kind.

+

Parameters:
+ + +
kind The kind of activity record to collect
+
+
Return values:
+ + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_UNKNOWN if buffer is not initialized.
CUPTI_ERROR_NOT_COMPATIBLE if the activity kind cannot be enabled
CUPTI_ERROR_INVALID_KIND if the activity kind is not supported
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiActivityEnableContext (CUcontext  context,
CUpti_ActivityKind  kind 
)
+
+
+ +

+Enable collection of a specific kind of activity record for a context. This setting done by this API will supersede the global settings for activity records enabled by cuptiActivityEnable. Multiple kinds can be enabled by calling this function multiple times.

+

Parameters:
+ + + +
context The context for which activity is to be enabled
kind The kind of activity record to collect
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_NOT_COMPATIBLE if the activity kind cannot be enabled
CUPTI_ERROR_INVALID_KIND if the activity kind is not supported
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiActivityEnableLatencyTimestamps (uint8_t  enable  ) 
+
+
+ +

+This API is used to control the collection of queued and submitted timestamps for kernels whose records are provided through the struct CUpti_ActivityKernel8. Default value is 0, i.e. these timestamps are not collected. This API needs to be called before initialization of CUDA and this setting should not be changed during the profiling session.

+

Parameters:
+ + +
enable is a boolean, denoting whether these timestamps should be collected
+
+
Return values:
+ + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiActivityEnableLaunchAttributes (uint8_t  enable  ) 
+
+
+ +

+This API is used to control the collection of launch attributes for kernels whose records are provided through the struct CUpti_ActivityKernel8. Default value is 0, i.e. these attributes are not collected.

+

Parameters:
+ + +
enable is a boolean denoting whether these launch attributes should be collected
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiActivityFlush (CUcontext  context,
uint32_t  streamId,
uint32_t  flag 
)
+
+
+ +

+This function does not return until all activity records associated with the specified context/stream are returned to the CUPTI client using the callback registered in cuptiActivityRegisterCallbacks. To ensure that all activity records are complete, the requested stream(s), if any, are synchronized.

+If context is NULL, the global activity records (i.e. those not associated with a particular stream) are flushed (in this case no streams are synchonized). If context is a valid CUcontext and streamId is 0, the buffers of all streams of this context are flushed. Otherwise, the buffers of the specified stream in this context is flushed.

+Before calling this function, the buffer handling callback api must be activated by calling cuptiActivityRegisterCallbacks.

+

Parameters:
+ + + + +
context A valid CUcontext or NULL.
streamId The stream ID.
flag The flag can be set to indicate a forced flush. See CUpti_ActivityFlag
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_CUPTI_ERROR_INVALID_OPERATION if not preceeded by a successful call to cuptiActivityRegisterCallbacks
CUPTI_ERROR_UNKNOWN an internal error occurred
+
+**DEPRECATED** This method is deprecated CONTEXT and STREAMID will be ignored. Use cuptiActivityFlushAll to flush all data. +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiActivityFlushAll (uint32_t  flag  ) 
+
+
+ +

+This function returns the activity records associated with all contexts/streams (and the global buffers not associated with any stream) to the CUPTI client using the callback registered in cuptiActivityRegisterCallbacks.

+This is a blocking call but it doesn't issue any CUDA synchronization calls implicitly thus it's not guaranteed that all activities are completed on the underlying devices. Activity record is considered as completed if it has all the information filled up including the timestamps if any. It is the client's responsibility to issue necessary CUDA synchronization calls before calling this function if all activity records with complete information are expected to be delivered.

+Behavior of the function based on the input flag:

    +
  • For default flush i.e. when flag is set as 0, it returns all the activity buffers which have all the activity records completed, buffers need not to be full though. It doesn't return buffers which have one or more incomplete records. Default flush can be done at a regular interval in a separate thread.
  • For forced flush i.e. when flag CUPTI_ACTIVITY_FLAG_FLUSH_FORCED is passed to the function, it returns all the activity buffers including the ones which have one or more incomplete activity records. It's suggested for clients to do the force flush before the termination of the profiling session to allow remaining buffers to be delivered. In general, it can be done in the at-exit handler.
+

+Before calling this function, the buffer handling callback api must be activated by calling cuptiActivityRegisterCallbacks.

+

Parameters:
+ + +
flag The flag can be set to indicate a forced flush. See CUpti_ActivityFlag
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_OPERATION if not preceeded by a successful call to cuptiActivityRegisterCallbacks
CUPTI_ERROR_UNKNOWN an internal error occurred
+
+
See also:
cuptiActivityFlushPeriod
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiActivityFlushPeriod (uint32_t  time  ) 
+
+
+ +

+CUPTI creates a worker thread to minimize the perturbance for the application created threads. CUPTI offloads certain operations from the application threads to the worker thread, this includes synchronization of profiling resources between host and device, delivery of the activity buffers to the client using the callback registered in cuptiActivityRegisterCallbacks. For performance reasons, CUPTI wakes up the worker thread based on certain heuristics.

+This API is used to control the flush period of the worker thread. This setting will override the CUPTI heurtistics. Setting time to zero disables the periodic flush and restores the default behavior.

+Periodic flush can return only those activity buffers which are full and have all the activity records completed.

+It's allowed to use the API cuptiActivityFlushAll to flush the data on-demand, even when client sets the periodic flush.

+

Parameters:
+ + +
time flush period in msec
+
+
Return values:
+ + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
+
+
See also:
cuptiActivityFlushAll
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiActivityGetAttribute (CUpti_ActivityAttribute  attr,
size_t *  valueSize,
void *  value 
)
+
+
+ +

+Read an activity API attribute and return it in *value.

+

Parameters:
+ + + + +
attr The attribute to read
valueSize Size of buffer pointed by the value, and returns the number of bytes written to value
value Returns the value of the attribute
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_PARAMETER if valueSize or value is NULL, or if attr is not an activity attribute
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT Indicates that the value buffer is too small to hold the attribute value.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiActivityGetNextRecord (uint8_t *  buffer,
size_t  validBufferSizeBytes,
CUpti_Activity **  record 
)
+
+
+ +

+This is a helper function to iterate over the activity records in a buffer. A buffer of activity records is typically obtained by receiving a CUpti_BuffersCallbackCompleteFunc callback.

+An example of typical usage:

 CUpti_Activity *record = NULL;
+ CUptiResult status = CUPTI_SUCCESS;
+   do {
+      status = cuptiActivityGetNextRecord(buffer, validSize, &record);
+      if(status == CUPTI_SUCCESS) {
+           // Use record here...
+      }
+      else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED)
+          break;
+      else {
+          goto Error;
+      }
+    } while (1);
+

+

Parameters:
+ + + + +
buffer The buffer containing activity records
record Inputs the previous record returned by cuptiActivityGetNextRecord and returns the next activity record from the buffer. If input value is NULL, returns the first activity record in the buffer. Records of kind CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL may contain invalid (0) timestamps, indicating that no timing information could be collected for lack of device memory.
validBufferSizeBytes The number of valid bytes in the buffer.
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_MAX_LIMIT_REACHED if no more records in the buffer
CUPTI_ERROR_INVALID_PARAMETER if buffer is NULL.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiActivityGetNumDroppedRecords (CUcontext  context,
uint32_t  streamId,
size_t *  dropped 
)
+
+
+ +

+Get the number of records that were dropped because of insufficient buffer space. The dropped count includes records that could not be recorded because CUPTI did not have activity buffer space available for the record (because the CUpti_BuffersCallbackRequestFunc callback did not return an empty buffer of sufficient size) and also CDP records that could not be record because the device-size buffer was full (size is controlled by the CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE_CDP attribute). The dropped count maintained for the queue is reset to zero when this function is called.

+

Parameters:
+ + + + +
context The context, or NULL to get dropped count from global queue
streamId The stream ID
dropped The number of records that were dropped since the last call to this function.
+
+
Return values:
+ + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_PARAMETER if dropped is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiActivityPopExternalCorrelationId (CUpti_ExternalCorrelationKind  kind,
uint64_t *  lastId 
)
+
+
+ +

+This function notifies CUPTI that the calling thread is leaving an external API region.

+

Parameters:
+ + + +
kind The kind of external API activities should be correlated with.
lastId If the function returns successful, contains the last external correlation id for this kind, can be NULL.
+
+
Return values:
+ + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER The external API kind is invalid.
CUPTI_ERROR_QUEUE_EMPTY No external id is currently associated with kind.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiActivityPushExternalCorrelationId (CUpti_ExternalCorrelationKind  kind,
uint64_t  id 
)
+
+
+ +

+This function notifies CUPTI that the calling thread is entering an external API region. When a CUPTI activity API record is created while within an external API region and CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION is enabled, the activity API record will be preceeded by a CUpti_ActivityExternalCorrelation record for each CUpti_ExternalCorrelationKind.

+

Parameters:
+ + + +
kind The kind of external API activities should be correlated with.
id External correlation id.
+
+
Return values:
+ + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER The external API kind is invalid
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiActivityRegisterCallbacks (CUpti_BuffersCallbackRequestFunc  funcBufferRequested,
CUpti_BuffersCallbackCompleteFunc  funcBufferCompleted 
)
+
+
+ +

+This function registers two callback functions to be used in asynchronous buffer handling. If registered, activity record buffers are handled using asynchronous requested/completed callbacks from CUPTI.

+Registering these callbacks prevents the client from using CUPTI's blocking enqueue/dequeue functions.

+

Parameters:
+ + + +
funcBufferRequested callback which is invoked when an empty buffer is requested by CUPTI
funcBufferCompleted callback which is invoked when a buffer containing activity records is available from CUPTI
+
+
Return values:
+ + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if either funcBufferRequested or funcBufferCompleted is NULL
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiActivityRegisterTimestampCallback (CUpti_TimestampCallbackFunc  funcTimestamp  ) 
+
+
+ +

+This function registers a callback function to obtain timestamp of user's choice instead of using CUPTI provided timestamp. By default CUPTI uses different methods, based on the underlying platform, to retrieve the timestamp Linux and Android use clock_gettime(CLOCK_REALTIME, ..) Windows uses QueryPerformanceCounter() Mac uses mach_absolute_time() QNX uses ClockCycles() Timestamps retrieved using these methods are converted to nanosecond if needed before usage.

+The registration of timestamp callback should be done before any of the CUPTI activity kinds are enabled to make sure that all the records report the timestamp using the callback function registered through cuptiActivityRegisterTimestampCallback API.

+Changing the timestamp callback function in CUPTI through cuptiActivityRegisterTimestampCallback API in the middle of the profiling session can cause records generated prior to the change to report timestamps through previous timestamp method.

+

Parameters:
+ + +
funcTimestamp callback which is invoked when a timestamp is needed by CUPTI
+
+
Return values:
+ + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if funcTimestamp is NULL
CUPTI_ERROR_NOT_INITIALIZED 
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiActivitySetAttribute (CUpti_ActivityAttribute  attr,
size_t *  valueSize,
void *  value 
)
+
+
+ +

+Write an activity API attribute.

+

Parameters:
+ + + + +
attr The attribute to write
valueSize The size, in bytes, of the value
value The attribute value to write
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_PARAMETER if valueSize or value is NULL, or if attr is not an activity attribute
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT Indicates that the value buffer is too small to hold the attribute value.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiComputeCapabilitySupported (int  major,
int  minor,
int *  support 
)
+
+
+ +

+This function is used to check the support for a device based on it's compute capability. It sets the support when the compute capability is supported by the current version of CUPTI, and clears it otherwise. This version of CUPTI might not support all GPUs sharing the same compute capability. It is suggested to use API cuptiDeviceSupported which provides correct information.

+

Parameters:
+ + + + +
major The major revision number of the compute capability
minor The minor revision number of the compute capability
support Pointer to an integer to return the support status
+
+
Return values:
+ + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if support is NULL
+
+
See also:
cuptiDeviceSupported
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiDeviceSupported (CUdevice  dev,
int *  support 
)
+
+
+ +

+This function is used to check the support for a compute device. It sets the support when the device is supported by the current version of CUPTI, and clears it otherwise.

+

Parameters:
+ + + +
dev The device handle returned by CUDA Driver API cuDeviceGet
support Pointer to an integer to return the support status
+
+
Return values:
+ + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if support is NULL
CUPTI_ERROR_INVALID_DEVICE if dev is not a valid device
+
+
See also:
cuptiComputeCapabilitySupported
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiDeviceVirtualizationMode (CUdevice  dev,
CUpti_DeviceVirtualizationMode mode 
)
+
+
+ +

+This function is used to query the virtualization mode of the CUDA device.

+

Parameters:
+ + + +
dev The device handle returned by CUDA Driver API cuDeviceGet
mode Pointer to an CUpti_DeviceVirtualizationMode to return the virtualization mode
+
+
Return values:
+ + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_DEVICE if dev is not a valid device
CUPTI_ERROR_INVALID_PARAMETER if mode is NULL
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiFinalize (void   ) 
+
+
+ +

+This API detaches the CUPTI from the running process. It destroys and cleans up all the resources associated with CUPTI in the current process. After CUPTI detaches from the process, the process will keep on running with no CUPTI attached to it. For safe operation of the API, it is recommended this API is invoked from the exit callsite of any of the CUDA Driver or Runtime API. Otherwise CUPTI client needs to make sure that required CUDA synchronization and CUPTI activity buffer flush is done before calling the API. Sample code showing the usage of the API in the cupti callback handler code:

    void CUPTIAPI
+    cuptiCallbackHandler(void *userdata, CUpti_CallbackDomain domain,
+        CUpti_CallbackId cbid, void *cbdata)
+    {
+        const CUpti_CallbackData *cbInfo = (CUpti_CallbackData *)cbdata;
+
+        // Take this code path when CUPTI detach is requested
+        if (detachCupti) {
+            switch(domain)
+            {
+            case CUPTI_CB_DOMAIN_RUNTIME_API:
+            case CUPTI_CB_DOMAIN_DRIVER_API:
+                if (cbInfo->callbackSite == CUPTI_API_EXIT) {
+                    // call the CUPTI detach API
+                    cuptiFinalize();
+                }
+                break;
+            default:
+                break;
+            }
+        }
+    }
+
+
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiGetAutoBoostState (CUcontext  context,
CUpti_ActivityAutoBoostState state 
)
+
+
+ +

+The profiling results can be inconsistent in case auto boost is enabled. CUPTI tries to disable auto boost while profiling. It can fail to disable in cases where user does not have the permissions or CUDA_AUTO_BOOST env variable is set. The function can be used to query whether auto boost is enabled.

+

Parameters:
+ + + +
context A valid CUcontext.
state A pointer to CUpti_ActivityAutoBoostState structure which contains the current state and the id of the process that has requested the current state
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if CUcontext or state is NULL
CUPTI_ERROR_NOT_SUPPORTED Indicates that the device does not support auto boost
CUPTI_ERROR_UNKNOWN an internal error occurred
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiGetContextId (CUcontext  context,
uint32_t *  contextId 
)
+
+
+ +

+Get the ID of a context.

+

Parameters:
+ + + +
context The context
contextId Returns a process-unique ID for the context
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_CONTEXT The context is NULL or not valid.
CUPTI_ERROR_INVALID_PARAMETER if contextId is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiGetDeviceId (CUcontext  context,
uint32_t *  deviceId 
)
+
+
+ +

+If context is NULL, returns the ID of the device that contains the currently active context. If context is non-NULL, returns the ID of the device which contains that context. Operates in a similar manner to cudaGetDevice() or cuCtxGetDevice() but may be called from within callback functions.

+

Parameters:
+ + + +
context The context, or NULL to indicate the current context.
deviceId Returns the ID of the device that is current for the calling thread.
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_DEVICE if unable to get device ID
CUPTI_ERROR_INVALID_PARAMETER if deviceId is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiGetGraphId (CUgraph  graph,
uint32_t *  pId 
)
+
+
+ +

+Returns the unique ID of CUDA graph.

+

Parameters:
+ + + +
graph The graph.
pId Returns the unique ID of the graph
+
+
Return values:
+ + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_PARAMETER if graph is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiGetGraphNodeId (CUgraphNode  node,
uint64_t *  nodeId 
)
+
+
+ +

+Returns the unique ID of the CUDA graph node.

+

Parameters:
+ + + +
node The graph node.
nodeId Returns the unique ID of the node
+
+
Return values:
+ + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_PARAMETER if node is NULL
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiGetLastError (void   ) 
+
+
+ +

+Returns the last error that has been produced by any of the cupti api calls or the callback in the same host thread and resets it to CUPTI_SUCCESS. +

+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiGetStreamId (CUcontext  context,
CUstream  stream,
uint32_t *  streamId 
)
+
+
+ +

+Get the ID of a stream. The stream ID is unique within a context (i.e. all streams within a context will have unique stream IDs).

+

Parameters:
+ + + + +
context If non-NULL then the stream is checked to ensure that it belongs to this context. Typically this parameter should be null.
stream The stream
streamId Returns a context-unique ID for the stream
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_STREAM if unable to get stream ID, or if context is non-NULL and stream does not belong to the context
CUPTI_ERROR_INVALID_PARAMETER if streamId is NULL
+
+**DEPRECATED** This method is deprecated as of CUDA 8.0. Use method cuptiGetStreamIdEx instead. +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiGetStreamIdEx (CUcontext  context,
CUstream  stream,
uint8_t  perThreadStream,
uint32_t *  streamId 
)
+
+
+ +

+Get the ID of a stream. The stream ID is unique within a context (i.e. all streams within a context will have unique stream IDs).

+

Parameters:
+ + + + + +
context If non-NULL then the stream is checked to ensure that it belongs to this context. Typically this parameter should be null.
stream The stream
perThreadStream Flag to indicate if program is compiled for per-thread streams
streamId Returns a context-unique ID for the stream
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_STREAM if unable to get stream ID, or if context is non-NULL and stream does not belong to the context
CUPTI_ERROR_INVALID_PARAMETER if streamId is NULL
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiGetThreadIdType (CUpti_ActivityThreadIdType type  ) 
+
+
+ +

+Returns the thread-id type used in CUPTI

+

Return values:
+ + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if type is NULL
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiGetTimestamp (uint64_t *  timestamp  ) 
+
+
+ +

+Returns a timestamp normalized to correspond with the start and end timestamps reported in the CUPTI activity records. The timestamp is reported in nanoseconds.

+

Parameters:
+ + +
timestamp Returns the CUPTI timestamp
+
+
Return values:
+ + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if timestamp is NULL
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiSetThreadIdType (CUpti_ActivityThreadIdType  type  ) 
+
+
+ +

+CUPTI uses the method corresponding to set type to generate the thread-id. See enum CUpti_ActivityThreadIdType for the list of methods. Activity records having thread-id field contain the same value. Thread id type must not be changed during the profiling session to avoid thread-id value mismatch across activity records.

+

Return values:
+ + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_SUPPORTED if type is not supported on the platform
+
+ +
+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/group__CUPTI__CALLBACK__API.html b/doc/Cupti/group__CUPTI__CALLBACK__API.html new file mode 100644 index 0000000000000000000000000000000000000000..789aabe10f9ca9008df0144672ee08ca1e0a5d82 --- /dev/null +++ b/doc/Cupti/group__CUPTI__CALLBACK__API.html @@ -0,0 +1,740 @@ + + +Cupti: CUPTI Callback API + + + + + +
+

CUPTI Callback API

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Structures

struct  CUpti_CallbackData
 Data passed into a runtime or driver API callback function. More...
struct  CUpti_GraphData
 CUDA graphs data passed into a resource callback function. More...
struct  CUpti_ModuleResourceData
 Module data passed into a resource callback function. More...
struct  CUpti_NvtxData
 Data passed into a NVTX callback function. More...
struct  CUpti_ResourceData
 Data passed into a resource callback function. More...
struct  CUpti_SynchronizeData
 Data passed into a synchronize callback function. More...

Typedefs

typedef void(* CUpti_CallbackFunc )(void *userdata, CUpti_CallbackDomain domain, CUpti_CallbackId cbid, const void *cbdata)
 Function type for a callback.
typedef uint32_t CUpti_CallbackId
 An ID for a driver API, runtime API, resource or synchronization callback.
+typedef CUpti_CallbackDomainCUpti_DomainTable
 Pointer to an array of callback domains.
+typedef struct
+CUpti_Subscriber_st * 
CUpti_SubscriberHandle
 A callback subscriber.

Enumerations

enum  CUpti_ApiCallbackSite {
+  CUPTI_API_ENTER = 0, +
+  CUPTI_API_EXIT = 1 +
+ }
 Specifies the point in an API call that a callback is issued. More...
enum  CUpti_CallbackDomain {
+  CUPTI_CB_DOMAIN_INVALID = 0, +
+  CUPTI_CB_DOMAIN_DRIVER_API = 1, +
+  CUPTI_CB_DOMAIN_RUNTIME_API = 2, +
+  CUPTI_CB_DOMAIN_RESOURCE = 3, +
+  CUPTI_CB_DOMAIN_SYNCHRONIZE = 4, +
+  CUPTI_CB_DOMAIN_NVTX = 5 +
+ }
 Callback domains. More...
enum  CUpti_CallbackIdResource {
+  CUPTI_CBID_RESOURCE_INVALID = 0, +
+  CUPTI_CBID_RESOURCE_CONTEXT_CREATED = 1, +
+  CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING = 2, +
+  CUPTI_CBID_RESOURCE_STREAM_CREATED = 3, +
+  CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING = 4, +
+  CUPTI_CBID_RESOURCE_CU_INIT_FINISHED = 5, +
+  CUPTI_CBID_RESOURCE_MODULE_LOADED = 6, +
+  CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING = 7, +
+  CUPTI_CBID_RESOURCE_MODULE_PROFILED = 8, +
+  CUPTI_CBID_RESOURCE_GRAPH_CREATED = 9, +
+  CUPTI_CBID_RESOURCE_GRAPH_DESTROY_STARTING = 10, +
+  CUPTI_CBID_RESOURCE_GRAPH_CLONED = 11, +
+  CUPTI_CBID_RESOURCE_GRAPHNODE_CREATE_STARTING = 12, +
+  CUPTI_CBID_RESOURCE_GRAPHNODE_CREATED = 13, +
+  CUPTI_CBID_RESOURCE_GRAPHNODE_DESTROY_STARTING = 14, +
+  CUPTI_CBID_RESOURCE_GRAPHNODE_DEPENDENCY_CREATED = 15, +
+  CUPTI_CBID_RESOURCE_GRAPHNODE_DEPENDENCY_DESTROY_STARTING = 16, +
+  CUPTI_CBID_RESOURCE_GRAPHEXEC_CREATE_STARTING = 17, +
+  CUPTI_CBID_RESOURCE_GRAPHEXEC_CREATED = 18, +
+  CUPTI_CBID_RESOURCE_GRAPHEXEC_DESTROY_STARTING = 19, +
+  CUPTI_CBID_RESOURCE_GRAPHNODE_CLONED = 20 +
+ }
 Callback IDs for resource domain. More...
enum  CUpti_CallbackIdSync {
+  CUPTI_CBID_SYNCHRONIZE_INVALID = 0, +
+  CUPTI_CBID_SYNCHRONIZE_STREAM_SYNCHRONIZED = 1, +
+  CUPTI_CBID_SYNCHRONIZE_CONTEXT_SYNCHRONIZED = 2 +
+ }
 Callback IDs for synchronization domain. More...

Functions

CUptiResult cuptiEnableAllDomains (uint32_t enable, CUpti_SubscriberHandle subscriber)
 Enable or disable all callbacks in all domains.
CUptiResult cuptiEnableCallback (uint32_t enable, CUpti_SubscriberHandle subscriber, CUpti_CallbackDomain domain, CUpti_CallbackId cbid)
 Enable or disabled callbacks for a specific domain and callback ID.
CUptiResult cuptiEnableDomain (uint32_t enable, CUpti_SubscriberHandle subscriber, CUpti_CallbackDomain domain)
 Enable or disabled all callbacks for a specific domain.
CUptiResult cuptiGetCallbackName (CUpti_CallbackDomain domain, uint32_t cbid, const char **name)
 Get the name of a callback for a specific domain and callback ID.
CUptiResult cuptiGetCallbackState (uint32_t *enable, CUpti_SubscriberHandle subscriber, CUpti_CallbackDomain domain, CUpti_CallbackId cbid)
 Get the current enabled/disabled state of a callback for a specific domain and function ID.
CUptiResult cuptiSubscribe (CUpti_SubscriberHandle *subscriber, CUpti_CallbackFunc callback, void *userdata)
 Initialize a callback subscriber with a callback function and user data.
CUptiResult cuptiSupportedDomains (size_t *domainCount, CUpti_DomainTable *domainTable)
 Get the available callback domains.
CUptiResult cuptiUnsubscribe (CUpti_SubscriberHandle subscriber)
 Unregister a callback subscriber.
+

Detailed Description

+Functions, types, and enums that implement the CUPTI Callback API.

Typedef Documentation

+ +
+
+ + + + +
typedef void( * CUpti_CallbackFunc)(void *userdata, CUpti_CallbackDomain domain, CUpti_CallbackId cbid, const void *cbdata)
+
+
+ +

+Function type for a callback. The type of the data passed to the callback in cbdata depends on the domain. If domain is CUPTI_CB_DOMAIN_DRIVER_API or CUPTI_CB_DOMAIN_RUNTIME_API the type of cbdata will be CUpti_CallbackData. If domain is CUPTI_CB_DOMAIN_RESOURCE the type of cbdata will be CUpti_ResourceData. If domain is CUPTI_CB_DOMAIN_SYNCHRONIZE the type of cbdata will be CUpti_SynchronizeData. If domain is CUPTI_CB_DOMAIN_NVTX the type of cbdata will be CUpti_NvtxData.

+

Parameters:
+ + + + + +
userdata User data supplied at subscription of the callback
domain The domain of the callback
cbid The ID of the callback
cbdata Data passed to the callback.
+
+ +
+

+ +

+
+ + + + +
typedef uint32_t CUpti_CallbackId
+
+
+ +

+An ID for a driver API, runtime API, resource or synchronization callback. Within a driver API callback this should be interpreted as a CUpti_driver_api_trace_cbid value (these values are defined in cupti_driver_cbid.h). Within a runtime API callback this should be interpreted as a CUpti_runtime_api_trace_cbid value (these values are defined in cupti_runtime_cbid.h). Within a resource API callback this should be interpreted as a CUpti_CallbackIdResource value. Within a synchronize API callback this should be interpreted as a CUpti_CallbackIdSync value. +

+

+


Enumeration Type Documentation

+ +
+
+ + + + +
enum CUpti_ApiCallbackSite
+
+
+ +

+Specifies the point in an API call that a callback is issued. This value is communicated to the callback function via CUpti_CallbackData::callbackSite.

Enumerator:
+ + + +
CUPTI_API_ENTER  +The callback is at the entry of the API call.
CUPTI_API_EXIT  +The callback is at the exit of the API call.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_CallbackDomain
+
+
+ +

+Callback domains. Each domain represents callback points for a group of related API functions or CUDA driver activity.

Enumerator:
+ + + + + + + +
CUPTI_CB_DOMAIN_INVALID  +Invalid domain.
CUPTI_CB_DOMAIN_DRIVER_API  +Domain containing callback points for all driver API functions.
CUPTI_CB_DOMAIN_RUNTIME_API  +Domain containing callback points for all runtime API functions.
CUPTI_CB_DOMAIN_RESOURCE  +Domain containing callback points for CUDA resource tracking.
CUPTI_CB_DOMAIN_SYNCHRONIZE  +Domain containing callback points for CUDA synchronization.
CUPTI_CB_DOMAIN_NVTX  +Domain containing callback points for NVTX API functions.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_CallbackIdResource
+
+
+ +

+Callback IDs for resource domain, CUPTI_CB_DOMAIN_RESOURCE. This value is communicated to the callback function via the cbid parameter.

Enumerator:
+ + + + + + + + + + + + + + + + + + + + + + +
CUPTI_CBID_RESOURCE_INVALID  +Invalid resource callback ID.
CUPTI_CBID_RESOURCE_CONTEXT_CREATED  +A new context has been created.
CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING  +A context is about to be destroyed.
CUPTI_CBID_RESOURCE_STREAM_CREATED  +A new stream has been created.
CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING  +A stream is about to be destroyed.
CUPTI_CBID_RESOURCE_CU_INIT_FINISHED  +The driver has finished initializing.
CUPTI_CBID_RESOURCE_MODULE_LOADED  +A module has been loaded.
CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING  +A module is about to be unloaded.
CUPTI_CBID_RESOURCE_MODULE_PROFILED  +The current module which is being profiled.
CUPTI_CBID_RESOURCE_GRAPH_CREATED  +CUDA graph has been created.
CUPTI_CBID_RESOURCE_GRAPH_DESTROY_STARTING  +CUDA graph is about to be destroyed.
CUPTI_CBID_RESOURCE_GRAPH_CLONED  +CUDA graph is cloned.
CUPTI_CBID_RESOURCE_GRAPHNODE_CREATE_STARTING  +CUDA graph node is about to be created
CUPTI_CBID_RESOURCE_GRAPHNODE_CREATED  +CUDA graph node is created.
CUPTI_CBID_RESOURCE_GRAPHNODE_DESTROY_STARTING  +CUDA graph node is about to be destroyed.
CUPTI_CBID_RESOURCE_GRAPHNODE_DEPENDENCY_CREATED  +Dependency on a CUDA graph node is created.
CUPTI_CBID_RESOURCE_GRAPHNODE_DEPENDENCY_DESTROY_STARTING  +Dependency on a CUDA graph node is destroyed.
CUPTI_CBID_RESOURCE_GRAPHEXEC_CREATE_STARTING  +An executable CUDA graph is about to be created.
CUPTI_CBID_RESOURCE_GRAPHEXEC_CREATED  +An executable CUDA graph is created.
CUPTI_CBID_RESOURCE_GRAPHEXEC_DESTROY_STARTING  +An executable CUDA graph is about to be destroyed.
CUPTI_CBID_RESOURCE_GRAPHNODE_CLONED  +CUDA graph node is cloned.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_CallbackIdSync
+
+
+ +

+Callback IDs for synchronization domain, CUPTI_CB_DOMAIN_SYNCHRONIZE. This value is communicated to the callback function via the cbid parameter.

Enumerator:
+ + + + +
CUPTI_CBID_SYNCHRONIZE_INVALID  +Invalid synchronize callback ID.
CUPTI_CBID_SYNCHRONIZE_STREAM_SYNCHRONIZED  +Stream synchronization has completed for the stream.
CUPTI_CBID_SYNCHRONIZE_CONTEXT_SYNCHRONIZED  +Context synchronization has completed for the context.
+
+ +
+

+


Function Documentation

+ +
+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEnableAllDomains (uint32_t  enable,
CUpti_SubscriberHandle  subscriber 
)
+
+
+ +

+Enable or disable all callbacks in all domains.

+

Note:
Thread-safety: a subscriber must serialize access to cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and cuptiEnableAllDomains. For example, if cuptiGetCallbackState(sub, d, *) and cuptiEnableAllDomains(sub) are called concurrently, the results are undefined.
+
Parameters:
+ + + +
enable New enable state for all callbacks in all domain. Zero disables all callbacks, non-zero enables all callbacks.
subscriber - Handle to callback subscription
+
+
Return values:
+ + + + +
CUPTI_SUCCESS on success
CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
CUPTI_ERROR_INVALID_PARAMETER if subscriber is invalid
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEnableCallback (uint32_t  enable,
CUpti_SubscriberHandle  subscriber,
CUpti_CallbackDomain  domain,
CUpti_CallbackId  cbid 
)
+
+
+ +

+Enable or disabled callbacks for a subscriber for a specific domain and callback ID.

+

Note:
Thread-safety: a subscriber must serialize access to cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and cuptiEnableAllDomains. For example, if cuptiGetCallbackState(sub, d, c) and cuptiEnableCallback(sub, d, c) are called concurrently, the results are undefined.
+
Parameters:
+ + + + + +
enable New enable state for the callback. Zero disables the callback, non-zero enables the callback.
subscriber - Handle to callback subscription
domain The domain of the callback
cbid The ID of the callback
+
+
Return values:
+ + + + +
CUPTI_SUCCESS on success
CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
CUPTI_ERROR_INVALID_PARAMETER if subscriber, domain or cbid is invalid.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEnableDomain (uint32_t  enable,
CUpti_SubscriberHandle  subscriber,
CUpti_CallbackDomain  domain 
)
+
+
+ +

+Enable or disabled all callbacks for a specific domain.

+

Note:
Thread-safety: a subscriber must serialize access to cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and cuptiEnableAllDomains. For example, if cuptiGetCallbackEnabled(sub, d, *) and cuptiEnableDomain(sub, d) are called concurrently, the results are undefined.
+
Parameters:
+ + + + +
enable New enable state for all callbacks in the domain. Zero disables all callbacks, non-zero enables all callbacks.
subscriber - Handle to callback subscription
domain The domain of the callback
+
+
Return values:
+ + + + +
CUPTI_SUCCESS on success
CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
CUPTI_ERROR_INVALID_PARAMETER if subscriber or domain is invalid
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiGetCallbackName (CUpti_CallbackDomain  domain,
uint32_t  cbid,
const char **  name 
)
+
+
+ +

+Returns a pointer to the name c_string in **name.

+

Note:
Names are available only for the DRIVER and RUNTIME domains.
+
Parameters:
+ + + + +
domain The domain of the callback
cbid The ID of the callback
name Returns pointer to the name string on success, NULL otherwise
+
+
Return values:
+ + + +
CUPTI_SUCCESS on success
CUPTI_ERROR_INVALID_PARAMETER if name is NULL, or if domain or cbid is invalid.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiGetCallbackState (uint32_t *  enable,
CUpti_SubscriberHandle  subscriber,
CUpti_CallbackDomain  domain,
CUpti_CallbackId  cbid 
)
+
+
+ +

+Returns non-zero in *enable if the callback for a domain and callback ID is enabled, and zero if not enabled.

+

Note:
Thread-safety: a subscriber must serialize access to cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and cuptiEnableAllDomains. For example, if cuptiGetCallbackState(sub, d, c) and cuptiEnableCallback(sub, d, c) are called concurrently, the results are undefined.
+
Parameters:
+ + + + + +
enable Returns non-zero if callback enabled, zero if not enabled
subscriber Handle to the initialize subscriber
domain The domain of the callback
cbid The ID of the callback
+
+
Return values:
+ + + + +
CUPTI_SUCCESS on success
CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
CUPTI_ERROR_INVALID_PARAMETER if enabled is NULL, or if subscriber, domain or cbid is invalid.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiSubscribe (CUpti_SubscriberHandle subscriber,
CUpti_CallbackFunc  callback,
void *  userdata 
)
+
+
+ +

+Initializes a callback subscriber with a callback function and (optionally) a pointer to user data. The returned subscriber handle can be used to enable and disable the callback for specific domains and callback IDs.

Note:
Only a single subscriber can be registered at a time. To ensure that no other CUPTI client interrupts the profiling session, it's the responsibility of all the CUPTI clients to call this function before starting the profling session. In case profiling session is already started by another CUPTI client, this function returns the error code CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED. Note that this function returns the same error when application is launched using NVIDIA tools like nvprof, Visual Profiler, Nsight Systems, Nsight Compute, cuda-gdb and cuda-memcheck.

+This function does not enable any callbacks.

+Thread-safety: this function is thread safe.

+
Parameters:
+ + + + +
subscriber Returns handle to initialize subscriber
callback The callback function
userdata A pointer to user data. This data will be passed to the callback function via the userdata paramater.
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS on success
CUPTI_ERROR_NOT_INITIALIZED if unable to initialize CUPTI
CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED if there is already a CUPTI subscriber
CUPTI_ERROR_INVALID_PARAMETER if subscriber is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiSupportedDomains (size_t *  domainCount,
CUpti_DomainTable domainTable 
)
+
+
+ +

+Returns in *domainTable an array of size *domainCount of all the available callback domains.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + +
domainCount Returns number of callback domains
domainTable Returns pointer to array of available callback domains
+
+
Return values:
+ + + + +
CUPTI_SUCCESS on success
CUPTI_ERROR_NOT_INITIALIZED if unable to initialize CUPTI
CUPTI_ERROR_INVALID_PARAMETER if domainCount or domainTable are NULL
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiUnsubscribe (CUpti_SubscriberHandle  subscriber  ) 
+
+
+ +

+Removes a callback subscriber so that no future callbacks will be issued to that subscriber.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + +
subscriber Handle to the initialize subscriber
+
+
Return values:
+ + + + +
CUPTI_SUCCESS on success
CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
CUPTI_ERROR_INVALID_PARAMETER if subscriber is NULL or not initialized
+
+ +
+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/group__CUPTI__CHECKPOINT__API.html b/doc/Cupti/group__CUPTI__CHECKPOINT__API.html new file mode 100644 index 0000000000000000000000000000000000000000..df8c818f6821f2b42ad4b1962026e083b1359338 --- /dev/null +++ b/doc/Cupti/group__CUPTI__CHECKPOINT__API.html @@ -0,0 +1,179 @@ + + +Cupti: CUPTI Checkpoint API + + + + + +
+

CUPTI Checkpoint API

+ + + + + + + + + + + + + + + + + + + +

Data Structures

struct  NV::Cupti::Checkpoint::CUpti_Checkpoint
 Configuration and handle for a CUPTI Checkpoint. More...

Enumerations

enum  NV::Cupti::Checkpoint::CUpti_CheckpointOptimizations {
+  NV::Cupti::Checkpoint::CUPTI_CHECKPOINT_OPT_NONE = 0, +
+  NV::Cupti::Checkpoint::CUPTI_CHECKPOINT_OPT_TRANSFER = 1 +
+ }
 Specifies optimization options for a checkpoint, may be OR'd together to specify multiple options. More...

Functions

CUptiResult NV::Cupti::Checkpoint::cuptiCheckpointFree (CUpti_Checkpoint *const handle)
 Free the backing data for a checkpoint.
CUptiResult NV::Cupti::Checkpoint::cuptiCheckpointRestore (CUpti_Checkpoint *const handle)
 Restore a checkpoint to the device associated with its context.
CUptiResult NV::Cupti::Checkpoint::cuptiCheckpointSave (CUpti_Checkpoint *const handle)
 Initialize and save a checkpoint of the device state associated with the handle context.
+

Detailed Description

+Functions, types, and enums that implement the CUPTI Checkpoint API.

Enumeration Type Documentation

+ +
+ +
+ +

+

Enumerator:
+ + + +
CUPTI_CHECKPOINT_OPT_NONE  +Default behavior.
CUPTI_CHECKPOINT_OPT_TRANSFER  +Determine which mem blocks have changed, and only restore those. This optimization is cached, which means cuptiCheckpointRestore must always be called at the same point in the application when this option is enabled, or the result may be incorrect.
+
+ +
+

+


Function Documentation

+ +
+
+ + + + + + + + + +
CUptiResult NV::Cupti::Checkpoint::cuptiCheckpointFree (CUpti_Checkpoint *const   handle  ) 
+
+
+ +

+Frees all associated device, host memory and filesystem storage used for this context. After freeing a handle, it may be re-used as if it was new - options may be re-configured and will take effect on the next call to cuptiCheckpointSave.

+

Parameters:
+ + +
handle A pointer to a previously saved CUpti_Checkpoint object
+
+
Return values:
+ + + + +
CUPTI_SUCCESS if the handle was successfully freed
CUPTI_ERROR_INVALID_PARAMETER if the handle was already freed or appears invalid
CUPTI_ERROR_INVALID_CONTEXT if the context is no longer valid
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult NV::Cupti::Checkpoint::cuptiCheckpointRestore (CUpti_Checkpoint *const   handle  ) 
+
+
+ +

+Restores device, pinned, and allocated memory to the state when the checkpoint was saved

+

Parameters:
+ + +
handle A pointer to a previously saved CUpti_Checkpoint object
+
+
Return values:
+ + + + + + +
CUTPI_SUCCESS if the checkpoint was successfully restored
CUPTI_ERROR_NOT_INITIALIZED if the checkpoint was not previously initialized
CUPTI_ERROR_INVALID_CONTEXT 
CUPTI_ERROR_INVALID_PARAMETER if the handle appears invalid
CUPTI_ERROR_UNKNOWN if the restore or optimization operation fails
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult NV::Cupti::Checkpoint::cuptiCheckpointSave (CUpti_Checkpoint *const   handle  ) 
+
+
+ +

+Uses the handle options to configure and save a checkpoint of the device state associated with the specified context.

+

Parameters:
+ + +
handle A pointer to a CUpti_Checkpoint object
+
+
Return values:
+ + + + + + + +
CUPTI_SUCCESS if a checkpoint was successfully initialized and saved
CUPTI_ERROR_INVALID_PARAMETER if handle does not appear to refer to a valid CUpti_Checkpoint
CUPTI_ERROR_INVALID_CONTEXT 
CUPTI_ERROR_INVALID_DEVICE if device associated with context is not compatible with checkpoint API
CUPTI_ERROR_INVALID_OPERATION if Save is requested over an existing checkpoint, but allowOverwrite was not originally specified
CUPTI_ERROR_OUT_OF_MEMORY if as configured, not enough backing storage space to save the checkpoint
+
+ +
+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/group__CUPTI__EVENT__API.html b/doc/Cupti/group__CUPTI__EVENT__API.html new file mode 100644 index 0000000000000000000000000000000000000000..16b589c9ed2771df39a14e125ed02644c41ae706 --- /dev/null +++ b/doc/Cupti/group__CUPTI__EVENT__API.html @@ -0,0 +1,2190 @@ + + +Cupti: CUPTI Event API + + + + + +
+

CUPTI Event API

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Structures

struct  CUpti_EventGroupSet
 A set of event groups. More...
struct  CUpti_EventGroupSets
 A set of event group sets. More...

Defines

+#define CUPTI_EVENT_INVALID   ((uint64_t)0xFFFFFFFFFFFFFFFEULL)
 The value that indicates the event value is invalid.
#define CUPTI_EVENT_OVERFLOW   ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
 The overflow value for a CUPTI event.

Typedefs

typedef uint32_t CUpti_EventDomainID
 ID for an event domain.
typedef void * CUpti_EventGroup
 A group of events.
typedef uint32_t CUpti_EventID
 ID for an event.
typedef void(* CUpti_KernelReplayUpdateFunc )(const char *kernelName, int numReplaysDone, void *customData)
 Function type for getting updates on kernel replay.

Enumerations

enum  CUpti_DeviceAttribute {
+  CUPTI_DEVICE_ATTR_MAX_EVENT_ID = 1, +
+  CUPTI_DEVICE_ATTR_MAX_EVENT_DOMAIN_ID = 2, +
+  CUPTI_DEVICE_ATTR_GLOBAL_MEMORY_BANDWIDTH = 3, +
+  CUPTI_DEVICE_ATTR_INSTRUCTION_PER_CYCLE = 4, +
+  CUPTI_DEVICE_ATTR_INSTRUCTION_THROUGHPUT_SINGLE_PRECISION = 5, +
+  CUPTI_DEVICE_ATTR_MAX_FRAME_BUFFERS = 6, +
+  CUPTI_DEVICE_ATTR_PCIE_LINK_RATE = 7, +
+  CUPTI_DEVICE_ATTR_PCIE_LINK_WIDTH = 8, +
+  CUPTI_DEVICE_ATTR_PCIE_GEN = 9, +
+  CUPTI_DEVICE_ATTR_DEVICE_CLASS = 10, +
+  CUPTI_DEVICE_ATTR_FLOP_SP_PER_CYCLE = 11, +
+  CUPTI_DEVICE_ATTR_FLOP_DP_PER_CYCLE = 12, +
+  CUPTI_DEVICE_ATTR_MAX_L2_UNITS = 13, +
+  CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_SHARED = 14, +
+  CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_L1 = 15, +
+  CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_EQUAL = 16, +
+  CUPTI_DEVICE_ATTR_FLOP_HP_PER_CYCLE = 17, +
+  CUPTI_DEVICE_ATTR_NVLINK_PRESENT = 18, +
+  CUPTI_DEVICE_ATTR_GPU_CPU_NVLINK_BW = 19, +
+  CUPTI_DEVICE_ATTR_NVSWITCH_PRESENT = 20 +
+ }
 Device attributes. More...
enum  CUpti_DeviceAttributeDeviceClass
 Device class. More...
enum  CUpti_EventAttribute {
+  CUPTI_EVENT_ATTR_NAME = 0, +
+  CUPTI_EVENT_ATTR_SHORT_DESCRIPTION = 1, +
+  CUPTI_EVENT_ATTR_LONG_DESCRIPTION = 2, +
+  CUPTI_EVENT_ATTR_CATEGORY = 3, +
+  CUPTI_EVENT_ATTR_PROFILING_SCOPE = 5 +
+ }
 Event attributes. More...
enum  CUpti_EventCategory {
+  CUPTI_EVENT_CATEGORY_INSTRUCTION = 0, +
+  CUPTI_EVENT_CATEGORY_MEMORY = 1, +
+  CUPTI_EVENT_CATEGORY_CACHE = 2, +
+  CUPTI_EVENT_CATEGORY_PROFILE_TRIGGER = 3, +
+  CUPTI_EVENT_CATEGORY_SYSTEM = 4 +
+ }
 An event category. More...
enum  CUpti_EventCollectionMethod {
+  CUPTI_EVENT_COLLECTION_METHOD_PM = 0, +
+  CUPTI_EVENT_COLLECTION_METHOD_SM = 1, +
+  CUPTI_EVENT_COLLECTION_METHOD_INSTRUMENTED = 2, +
+  CUPTI_EVENT_COLLECTION_METHOD_NVLINK_TC = 3 +
+ }
 The collection method used for an event. More...
enum  CUpti_EventCollectionMode {
+  CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS = 0, +
+  CUPTI_EVENT_COLLECTION_MODE_KERNEL = 1 +
+ }
 Event collection modes. More...
enum  CUpti_EventDomainAttribute {
+  CUPTI_EVENT_DOMAIN_ATTR_NAME = 0, +
+  CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT = 1, +
+  CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT = 3, +
+  CUPTI_EVENT_DOMAIN_ATTR_COLLECTION_METHOD = 4 +
+ }
 Event domain attributes. More...
enum  CUpti_EventGroupAttribute {
+  CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID = 0, +
+  CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES = 1, +
+  CUPTI_EVENT_GROUP_ATTR_USER_DATA = 2, +
+  CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS = 3, +
+  CUPTI_EVENT_GROUP_ATTR_EVENTS = 4, +
+  CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT = 5, +
+  CUPTI_EVENT_GROUP_ATTR_PROFILING_SCOPE = 6 +
+ }
 Event group attributes. More...
enum  CUpti_EventProfilingScope {
+  CUPTI_EVENT_PROFILING_SCOPE_CONTEXT = 0, +
+  CUPTI_EVENT_PROFILING_SCOPE_DEVICE = 1, +
+  CUPTI_EVENT_PROFILING_SCOPE_BOTH = 2 +
+ }
 Profiling scope for event. More...
enum  CUpti_ReadEventFlags { CUPTI_EVENT_READ_FLAG_NONE = 0 + }
 Flags for cuptiEventGroupReadEvent an cuptiEventGroupReadAllEvents. More...

Functions

CUptiResult cuptiDeviceEnumEventDomains (CUdevice device, size_t *arraySizeBytes, CUpti_EventDomainID *domainArray)
 Get the event domains for a device.
CUptiResult cuptiDeviceGetAttribute (CUdevice device, CUpti_DeviceAttribute attrib, size_t *valueSize, void *value)
 Read a device attribute.
CUptiResult cuptiDeviceGetEventDomainAttribute (CUdevice device, CUpti_EventDomainID eventDomain, CUpti_EventDomainAttribute attrib, size_t *valueSize, void *value)
 Read an event domain attribute.
CUptiResult cuptiDeviceGetNumEventDomains (CUdevice device, uint32_t *numDomains)
 Get the number of domains for a device.
CUptiResult cuptiDeviceGetTimestamp (CUcontext context, uint64_t *timestamp)
 Read a device timestamp.
CUptiResult cuptiDisableKernelReplayMode (CUcontext context)
 Disable kernel replay mode.
CUptiResult cuptiEnableKernelReplayMode (CUcontext context)
 Enable kernel replay mode.
CUptiResult cuptiEnumEventDomains (size_t *arraySizeBytes, CUpti_EventDomainID *domainArray)
 Get the event domains available on any device.
CUptiResult cuptiEventDomainEnumEvents (CUpti_EventDomainID eventDomain, size_t *arraySizeBytes, CUpti_EventID *eventArray)
 Get the events in a domain.
CUptiResult cuptiEventDomainGetAttribute (CUpti_EventDomainID eventDomain, CUpti_EventDomainAttribute attrib, size_t *valueSize, void *value)
 Read an event domain attribute.
CUptiResult cuptiEventDomainGetNumEvents (CUpti_EventDomainID eventDomain, uint32_t *numEvents)
 Get number of events in a domain.
CUptiResult cuptiEventGetAttribute (CUpti_EventID event, CUpti_EventAttribute attrib, size_t *valueSize, void *value)
 Get an event attribute.
CUptiResult cuptiEventGetIdFromName (CUdevice device, const char *eventName, CUpti_EventID *event)
 Find an event by name.
CUptiResult cuptiEventGroupAddEvent (CUpti_EventGroup eventGroup, CUpti_EventID event)
 Add an event to an event group.
CUptiResult cuptiEventGroupCreate (CUcontext context, CUpti_EventGroup *eventGroup, uint32_t flags)
 Create a new event group for a context.
CUptiResult cuptiEventGroupDestroy (CUpti_EventGroup eventGroup)
 Destroy an event group.
CUptiResult cuptiEventGroupDisable (CUpti_EventGroup eventGroup)
 Disable an event group.
CUptiResult cuptiEventGroupEnable (CUpti_EventGroup eventGroup)
 Enable an event group.
CUptiResult cuptiEventGroupGetAttribute (CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, size_t *valueSize, void *value)
 Read an event group attribute.
CUptiResult cuptiEventGroupReadAllEvents (CUpti_EventGroup eventGroup, CUpti_ReadEventFlags flags, size_t *eventValueBufferSizeBytes, uint64_t *eventValueBuffer, size_t *eventIdArraySizeBytes, CUpti_EventID *eventIdArray, size_t *numEventIdsRead)
 Read the values for all the events in an event group.
CUptiResult cuptiEventGroupReadEvent (CUpti_EventGroup eventGroup, CUpti_ReadEventFlags flags, CUpti_EventID event, size_t *eventValueBufferSizeBytes, uint64_t *eventValueBuffer)
 Read the value for an event in an event group.
CUptiResult cuptiEventGroupRemoveAllEvents (CUpti_EventGroup eventGroup)
 Remove all events from an event group.
CUptiResult cuptiEventGroupRemoveEvent (CUpti_EventGroup eventGroup, CUpti_EventID event)
 Remove an event from an event group.
CUptiResult cuptiEventGroupResetAllEvents (CUpti_EventGroup eventGroup)
 Zero all the event counts in an event group.
CUptiResult cuptiEventGroupSetAttribute (CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, size_t valueSize, void *value)
 Write an event group attribute.
CUptiResult cuptiEventGroupSetDisable (CUpti_EventGroupSet *eventGroupSet)
 Disable an event group set.
CUptiResult cuptiEventGroupSetEnable (CUpti_EventGroupSet *eventGroupSet)
 Enable an event group set.
CUptiResult cuptiEventGroupSetsCreate (CUcontext context, size_t eventIdArraySizeBytes, CUpti_EventID *eventIdArray, CUpti_EventGroupSets **eventGroupPasses)
 For a set of events, get the grouping that indicates the number of passes and the event groups necessary to collect the events.
CUptiResult cuptiEventGroupSetsDestroy (CUpti_EventGroupSets *eventGroupSets)
 Destroy a event group sets object.
CUptiResult cuptiGetNumEventDomains (uint32_t *numDomains)
 Get the number of event domains available on any device.
CUptiResult cuptiKernelReplaySubscribeUpdate (CUpti_KernelReplayUpdateFunc updateFunc, void *customData)
 Subscribe to kernel replay updates.
CUptiResult cuptiSetEventCollectionMode (CUcontext context, CUpti_EventCollectionMode mode)
 Set the event collection mode.
+

Detailed Description

+Functions, types, and enums that implement the CUPTI Event API.

+

Note:
CUPTI event API from the header cupti_events.h are not supported on devices with compute capability 7.5 and higher (i.e. Turing and later GPU architectures). These API will be deprecated in a future CUDA release. These are replaced by Profiling API in the header cupti_profiler_target.h and Perfworks metrics API in the headers nvperf_host.h and nvperf_target.h which are supported on devices with compute capability 7.0 and higher (i.e. Volta and later GPU architectures).
+

Define Documentation

+ +
+
+ + + + +
#define CUPTI_EVENT_OVERFLOW   ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+
+
+ +

+The CUPTI event value that indicates an overflow. +

+

+


Typedef Documentation

+ +
+
+ + + + +
typedef uint32_t CUpti_EventDomainID
+
+
+ +

+ID for an event domain. An event domain represents a group of related events. A device may have multiple instances of a domain, indicating that the device can simultaneously record multiple instances of each event within that domain. +

+

+ +

+
+ + + + +
typedef void* CUpti_EventGroup
+
+
+ +

+An event group is a collection of events that are managed together. All events in an event group must belong to the same domain. +

+

+ +

+
+ + + + +
typedef uint32_t CUpti_EventID
+
+
+ +

+An event represents a countable activity, action, or occurrence on the device. +

+

+ +

+
+ + + + +
typedef void( * CUpti_KernelReplayUpdateFunc)(const char *kernelName, int numReplaysDone, void *customData)
+
+
+ +

+

Parameters:
+ + + + +
kernelName The mangled kernel name
numReplaysDone Number of replays done so far
customData Pointer of any custom data passed in when subscribing
+
+ +
+

+


Enumeration Type Documentation

+ +
+
+ + + + +
enum CUpti_DeviceAttribute
+
+
+ +

+CUPTI device attributes. These attributes can be read using cuptiDeviceGetAttribute.

Enumerator:
+ + + + + + + + + + + + + + + + + + + + + +
CUPTI_DEVICE_ATTR_MAX_EVENT_ID  +Number of event IDs for a device. Value is a uint32_t.
CUPTI_DEVICE_ATTR_MAX_EVENT_DOMAIN_ID  +Number of event domain IDs for a device. Value is a uint32_t.
CUPTI_DEVICE_ATTR_GLOBAL_MEMORY_BANDWIDTH  +Get global memory bandwidth in Kbytes/sec. Value is a uint64_t.
CUPTI_DEVICE_ATTR_INSTRUCTION_PER_CYCLE  +Get theoretical maximum number of instructions per cycle. Value is a uint32_t.
CUPTI_DEVICE_ATTR_INSTRUCTION_THROUGHPUT_SINGLE_PRECISION  +Get theoretical maximum number of single precision instructions that can be executed per second. Value is a uint64_t.
CUPTI_DEVICE_ATTR_MAX_FRAME_BUFFERS  +Get number of frame buffers for device. Value is a uint64_t.
CUPTI_DEVICE_ATTR_PCIE_LINK_RATE  +Get PCIE link rate in Mega bits/sec for device. Return 0 if bus-type is non-PCIE. Value is a uint64_t.
CUPTI_DEVICE_ATTR_PCIE_LINK_WIDTH  +Get PCIE link width for device. Return 0 if bus-type is non-PCIE. Value is a uint64_t.
CUPTI_DEVICE_ATTR_PCIE_GEN  +Get PCIE generation for device. Return 0 if bus-type is non-PCIE. Value is a uint64_t.
CUPTI_DEVICE_ATTR_DEVICE_CLASS  +Get the class for the device. Value is a CUpti_DeviceAttributeDeviceClass.
CUPTI_DEVICE_ATTR_FLOP_SP_PER_CYCLE  +Get the peak single precision flop per cycle. Value is a uint64_t.
CUPTI_DEVICE_ATTR_FLOP_DP_PER_CYCLE  +Get the peak double precision flop per cycle. Value is a uint64_t.
CUPTI_DEVICE_ATTR_MAX_L2_UNITS  +Get number of L2 units. Value is a uint64_t.
CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_SHARED  +Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_SHARED preference. Value is a uint64_t.
CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_L1  +Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_L1 preference. Value is a uint64_t.
CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_EQUAL  +Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_EQUAL preference. Value is a uint64_t.
CUPTI_DEVICE_ATTR_FLOP_HP_PER_CYCLE  +Get the peak half precision flop per cycle. Value is a uint64_t.
CUPTI_DEVICE_ATTR_NVLINK_PRESENT  +Check if Nvlink is connected to device. Returns 1, if at least one Nvlink is connected to the device, returns 0 otherwise. Value is a uint32_t.
CUPTI_DEVICE_ATTR_GPU_CPU_NVLINK_BW  +Check if Nvlink is present between GPU and CPU. Returns Bandwidth, in Bytes/sec, if Nvlink is present, returns 0 otherwise. Value is a uint64_t.
CUPTI_DEVICE_ATTR_NVSWITCH_PRESENT  +Check if NVSwitch is present in the underlying topology. Returns 1, if present, returns 0 otherwise. Value is a uint32_t.
+
+ +
+

+ +

+ +
+ +

+Enumeration of device classes for device attribute CUPTI_DEVICE_ATTR_DEVICE_CLASS. +

+

+ +

+
+ + + + +
enum CUpti_EventAttribute
+
+
+ +

+Event attributes. These attributes can be read using cuptiEventGetAttribute.

Enumerator:
+ + + + + + +
CUPTI_EVENT_ATTR_NAME  +Event name. Value is a null terminated const c-string.
CUPTI_EVENT_ATTR_SHORT_DESCRIPTION  +Short description of event. Value is a null terminated const c-string.
CUPTI_EVENT_ATTR_LONG_DESCRIPTION  +Long description of event. Value is a null terminated const c-string.
CUPTI_EVENT_ATTR_CATEGORY  +Category of event. Value is CUpti_EventCategory.
CUPTI_EVENT_ATTR_PROFILING_SCOPE  +Profiling scope of the events. It can be either device or context or both. Value is a CUpti_EventProfilingScope.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_EventCategory
+
+
+ +

+Each event is assigned to a category that represents the general type of the event. A event's category is accessed using cuptiEventGetAttribute and the CUPTI_EVENT_ATTR_CATEGORY attribute.

Enumerator:
+ + + + + + +
CUPTI_EVENT_CATEGORY_INSTRUCTION  +An instruction related event.
CUPTI_EVENT_CATEGORY_MEMORY  +A memory related event.
CUPTI_EVENT_CATEGORY_CACHE  +A cache related event.
CUPTI_EVENT_CATEGORY_PROFILE_TRIGGER  +A profile-trigger event.
CUPTI_EVENT_CATEGORY_SYSTEM  +A system event.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_EventCollectionMethod
+
+
+ +

+The collection method indicates how an event is collected.

Enumerator:
+ + + + + +
CUPTI_EVENT_COLLECTION_METHOD_PM  +Event is collected using a hardware global performance monitor.
CUPTI_EVENT_COLLECTION_METHOD_SM  +Event is collected using a hardware SM performance monitor.
CUPTI_EVENT_COLLECTION_METHOD_INSTRUMENTED  +Event is collected using software instrumentation.
CUPTI_EVENT_COLLECTION_METHOD_NVLINK_TC  +Event is collected using NvLink throughput counter method.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_EventCollectionMode
+
+
+ +

+The event collection mode determines the period over which the events within the enabled event groups will be collected.

Enumerator:
+ + + +
CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS  +Events are collected for the entire duration between the cuptiEventGroupEnable and cuptiEventGroupDisable calls. Event values are reset when the events are read. For CUDA toolkit v6.0 and older this was the default mode.
CUPTI_EVENT_COLLECTION_MODE_KERNEL  +Events are collected only for the durations of kernel executions that occur between the cuptiEventGroupEnable and cuptiEventGroupDisable calls. Event collection begins when a kernel execution begins, and stops when kernel execution completes. Event values are reset to zero when each kernel execution begins. If multiple kernel executions occur between the cuptiEventGroupEnable and cuptiEventGroupDisable calls then the event values must be read after each kernel launch if those events need to be associated with the specific kernel launch. Note that collection in this mode may significantly change the overall performance characteristics of the application because kernel executions that occur between the cuptiEventGroupEnable and cuptiEventGroupDisable calls are serialized on the GPU. This is the default mode from CUDA toolkit v6.5
+
+ +
+

+ +

+
+ + + + +
enum CUpti_EventDomainAttribute
+
+
+ +

+Event domain attributes. Except where noted, all the attributes can be read using either cuptiDeviceGetEventDomainAttribute or cuptiEventDomainGetAttribute.

Enumerator:
+ + + + + +
CUPTI_EVENT_DOMAIN_ATTR_NAME  +Event domain name. Value is a null terminated const c-string.
CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT  +Number of instances of the domain for which event counts will be collected. The domain may have additional instances that cannot be profiled (see CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT). Can be read only with cuptiDeviceGetEventDomainAttribute. Value is a uint32_t.
CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT  +Total number of instances of the domain, including instances that cannot be profiled. Use CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT to get the number of instances that can be profiled. Can be read only with cuptiDeviceGetEventDomainAttribute. Value is a uint32_t.
CUPTI_EVENT_DOMAIN_ATTR_COLLECTION_METHOD  +Collection method used for events contained in the event domain. Value is a CUpti_EventCollectionMethod.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_EventGroupAttribute
+
+
+ +

+Event group attributes. These attributes can be read using cuptiEventGroupGetAttribute. Attributes marked [rw] can also be written using cuptiEventGroupSetAttribute.

Enumerator:
+ + + + + + + + +
CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID  +The domain to which the event group is bound. This attribute is set when the first event is added to the group. Value is a CUpti_EventDomainID.
CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES  +[rw] Profile all the instances of the domain for this eventgroup. This feature can be used to get load balancing across all instances of a domain. Value is an integer.
CUPTI_EVENT_GROUP_ATTR_USER_DATA  +[rw] Reserved for user data.
CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS  +Number of events in the group. Value is a uint32_t.
CUPTI_EVENT_GROUP_ATTR_EVENTS  +Enumerates events in the group. Value is a pointer to buffer of size sizeof(CUpti_EventID) * num_of_events in the eventgroup. num_of_events can be queried using CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS.
CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT  +Number of instances of the domain bound to this event group that will be counted. Value is a uint32_t.
CUPTI_EVENT_GROUP_ATTR_PROFILING_SCOPE  +Event group scope can be set to CUPTI_EVENT_PROFILING_SCOPE_DEVICE or CUPTI_EVENT_PROFILING_SCOPE_CONTEXT for an eventGroup, before adding any event. Sets the scope of eventgroup as CUPTI_EVENT_PROFILING_SCOPE_DEVICE or CUPTI_EVENT_PROFILING_SCOPE_CONTEXT when the scope of the events that will be added is CUPTI_EVENT_PROFILING_SCOPE_BOTH. If profiling scope of event is either CUPTI_EVENT_PROFILING_SCOPE_DEVICE or CUPTI_EVENT_PROFILING_SCOPE_CONTEXT then setting this attribute will not affect the default scope. It is not allowed to add events of different scope to same eventgroup. Value is a uint32_t.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_EventProfilingScope
+
+
+ +

+Profiling scope of event indicates if the event can be collected at context scope or device scope or both i.e. it can be collected at any of context or device scope.

Enumerator:
+ + + + +
CUPTI_EVENT_PROFILING_SCOPE_CONTEXT  +Event is collected at context scope.
CUPTI_EVENT_PROFILING_SCOPE_DEVICE  +Event is collected at device scope.
CUPTI_EVENT_PROFILING_SCOPE_BOTH  +Event can be collected at device or context scope. The scope can be set using cuptiEventGroupSetAttribute API.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_ReadEventFlags
+
+
+ +

+Flags for cuptiEventGroupReadEvent an cuptiEventGroupReadAllEvents.

Enumerator:
+ + +
CUPTI_EVENT_READ_FLAG_NONE  +No flags.
+
+ +
+

+


Function Documentation

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiDeviceEnumEventDomains (CUdevice  device,
size_t *  arraySizeBytes,
CUpti_EventDomainID domainArray 
)
+
+
+ +

+Returns the event domains IDs in domainArray for a device. The size of the domainArray buffer is given by *arraySizeBytes. The size of the domainArray buffer must be at least numdomains * sizeof(CUpti_EventDomainID) or else all domains will not be returned. The value returned in *arraySizeBytes contains the number of bytes returned in domainArray.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + + +
device The CUDA device
arraySizeBytes The size of domainArray in bytes, and returns the number of bytes written to domainArray
domainArray Returns the IDs of the event domains for the device
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_DEVICE 
CUPTI_ERROR_INVALID_PARAMETER if arraySizeBytes or domainArray are NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiDeviceGetAttribute (CUdevice  device,
CUpti_DeviceAttribute  attrib,
size_t *  valueSize,
void *  value 
)
+
+
+ +

+Read a device attribute and return it in *value.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + + + +
device The CUDA device
attrib The attribute to read
valueSize Size of buffer pointed by the value, and returns the number of bytes written to value
value Returns the value of the attribute
+
+
Return values:
+ + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_DEVICE 
CUPTI_ERROR_INVALID_PARAMETER if valueSize or value is NULL, or if attrib is not a device attribute
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string attribute values, indicates that the value buffer is too small to hold the attribute value.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiDeviceGetEventDomainAttribute (CUdevice  device,
CUpti_EventDomainID  eventDomain,
CUpti_EventDomainAttribute  attrib,
size_t *  valueSize,
void *  value 
)
+
+
+ +

+Returns an event domain attribute in *value. The size of the value buffer is given by *valueSize. The value returned in *valueSize contains the number of bytes returned in value.

+If the attribute value is a c-string that is longer than *valueSize, then only the first *valueSize characters will be returned and there will be no terminating null byte.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + + + + +
device The CUDA device
eventDomain ID of the event domain
attrib The event domain attribute to read
valueSize The size of the value buffer in bytes, and returns the number of bytes written to value
value Returns the attribute's value
+
+
Return values:
+ + + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_DEVICE 
CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID 
CUPTI_ERROR_INVALID_PARAMETER if valueSize or value is NULL, or if attrib is not an event domain attribute
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string attribute values, indicates that the value buffer is too small to hold the attribute value.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiDeviceGetNumEventDomains (CUdevice  device,
uint32_t *  numDomains 
)
+
+
+ +

+Returns the number of domains in numDomains for a device.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + +
device The CUDA device
numDomains Returns the number of domains
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_DEVICE 
CUPTI_ERROR_INVALID_PARAMETER if numDomains is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiDeviceGetTimestamp (CUcontext  context,
uint64_t *  timestamp 
)
+
+
+ +

+Returns the device timestamp in *timestamp. The timestamp is reported in nanoseconds and indicates the time since the device was last reset.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + +
context A context on the device from which to get the timestamp
timestamp Returns the device timestamp
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_CONTEXT 
CUPTI_ERROR_INVALID_PARAMETER is timestamp is NULL
+
+**DEPRECATED** This API is deprecated as of CUDA 11.3 +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiDisableKernelReplayMode (CUcontext  context  ) 
+
+
+ +

+Set profiling mode for the context to non-replay (default) mode. Event collection mode will be set to CUPTI_EVENT_COLLECTION_MODE_KERNEL. All previously enabled event groups and event group sets will be disabled.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + +
context The context
+
+
Return values:
+ + +
CUPTI_SUCCESS 
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiEnableKernelReplayMode (CUcontext  context  ) 
+
+
+ +

+Set profiling mode for the context to replay mode. In this mode, any number of events can be collected in one run of the kernel. The event collection mode will automatically switch to CUPTI_EVENT_COLLECTION_MODE_KERNEL. In this mode, cuptiSetEventCollectionMode will return CUPTI_ERROR_INVALID_OPERATION.

Note:
Kernels might take longer to run if many events are enabled.

+Thread-safety: this function is thread safe.

+
Parameters:
+ + +
context The context
+
+
Return values:
+ + +
CUPTI_SUCCESS 
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEnumEventDomains (size_t *  arraySizeBytes,
CUpti_EventDomainID domainArray 
)
+
+
+ +

+Returns all the event domains available on any CUDA-capable device. Event domain IDs are returned in domainArray. The size of the domainArray buffer is given by *arraySizeBytes. The size of the domainArray buffer must be at least numDomains * sizeof(CUpti_EventDomainID) or all domains will not be returned. The value returned in *arraySizeBytes contains the number of bytes returned in domainArray.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + +
arraySizeBytes The size of domainArray in bytes, and returns the number of bytes written to domainArray
domainArray Returns all the event domains
+
+
Return values:
+ + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if arraySizeBytes or domainArray are NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEventDomainEnumEvents (CUpti_EventDomainID  eventDomain,
size_t *  arraySizeBytes,
CUpti_EventID eventArray 
)
+
+
+ +

+Returns the event IDs in eventArray for a domain. The size of the eventArray buffer is given by *arraySizeBytes. The size of the eventArray buffer must be at least numdomainevents * sizeof(CUpti_EventID) or else all events will not be returned. The value returned in *arraySizeBytes contains the number of bytes returned in eventArray.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + + +
eventDomain ID of the event domain
arraySizeBytes The size of eventArray in bytes, and returns the number of bytes written to eventArray
eventArray Returns the IDs of the events in the domain
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID 
CUPTI_ERROR_INVALID_PARAMETER if arraySizeBytes or eventArray are NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEventDomainGetAttribute (CUpti_EventDomainID  eventDomain,
CUpti_EventDomainAttribute  attrib,
size_t *  valueSize,
void *  value 
)
+
+
+ +

+Returns an event domain attribute in *value. The size of the value buffer is given by *valueSize. The value returned in *valueSize contains the number of bytes returned in value.

+If the attribute value is a c-string that is longer than *valueSize, then only the first *valueSize characters will be returned and there will be no terminating null byte.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + + + +
eventDomain ID of the event domain
attrib The event domain attribute to read
valueSize The size of the value buffer in bytes, and returns the number of bytes written to value
value Returns the attribute's value
+
+
Return values:
+ + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID 
CUPTI_ERROR_INVALID_PARAMETER if valueSize or value is NULL, or if attrib is not an event domain attribute
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string attribute values, indicates that the value buffer is too small to hold the attribute value.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEventDomainGetNumEvents (CUpti_EventDomainID  eventDomain,
uint32_t *  numEvents 
)
+
+
+ +

+Returns the number of events in numEvents for a domain.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + +
eventDomain ID of the event domain
numEvents Returns the number of events in the domain
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID 
CUPTI_ERROR_INVALID_PARAMETER if numEvents is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEventGetAttribute (CUpti_EventID  event,
CUpti_EventAttribute  attrib,
size_t *  valueSize,
void *  value 
)
+
+
+ +

+Returns an event attribute in *value. The size of the value buffer is given by *valueSize. The value returned in *valueSize contains the number of bytes returned in value.

+If the attribute value is a c-string that is longer than *valueSize, then only the first *valueSize characters will be returned and there will be no terminating null byte.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + + + +
event ID of the event
attrib The event attribute to read
valueSize The size of the value buffer in bytes, and returns the number of bytes written to value
value Returns the attribute's value
+
+
Return values:
+ + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_EVENT_ID 
CUPTI_ERROR_INVALID_PARAMETER if valueSize or value is NULL, or if attrib is not an event attribute
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string attribute values, indicates that the value buffer is too small to hold the attribute value.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEventGetIdFromName (CUdevice  device,
const char *  eventName,
CUpti_EventID event 
)
+
+
+ +

+Find an event by name and return the event ID in *event.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + + +
device The CUDA device
eventName The name of the event to find
event Returns the ID of the found event or undefined if unable to find the event
+
+
Return values:
+ + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_DEVICE 
CUPTI_ERROR_INVALID_EVENT_NAME if unable to find an event with name eventName. In this case *event is undefined
CUPTI_ERROR_INVALID_PARAMETER if eventName or event are NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEventGroupAddEvent (CUpti_EventGroup  eventGroup,
CUpti_EventID  event 
)
+
+
+ +

+Add an event to an event group. The event add can fail for a number of reasons:

    +
  • The event group is enabled
  • +
  • The event does not belong to the same event domain as the events that are already in the event group
  • +
  • Device limitations on the events that can belong to the same group
  • +
  • The event group is full
  • +
+
Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + +
eventGroup The event group
event The event to add to the group
+
+
Return values:
+ + + + + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_EVENT_ID 
CUPTI_ERROR_OUT_OF_MEMORY 
CUPTI_ERROR_INVALID_OPERATION if eventGroup is enabled
CUPTI_ERROR_NOT_COMPATIBLE if event belongs to a different event domain than the events already in eventGroup, or if a device limitation prevents event from being collected at the same time as the events already in eventGroup
CUPTI_ERROR_MAX_LIMIT_REACHED if eventGroup is full
CUPTI_ERROR_INVALID_PARAMETER if eventGroup is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEventGroupCreate (CUcontext  context,
CUpti_EventGroup eventGroup,
uint32_t  flags 
)
+
+
+ +

+Creates a new event group for context and returns the new group in *eventGroup.

Note:
flags are reserved for future use and should be set to zero.

+Thread-safety: this function is thread safe.

+
Parameters:
+ + + + +
context The context for the event group
eventGroup Returns the new event group
flags Reserved - must be zero
+
+
Return values:
+ + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_CONTEXT 
CUPTI_ERROR_OUT_OF_MEMORY 
CUPTI_ERROR_INVALID_PARAMETER if eventGroup is NULL
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiEventGroupDestroy (CUpti_EventGroup  eventGroup  ) 
+
+
+ +

+Destroy an eventGroup and free its resources. An event group cannot be destroyed if it is enabled.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + +
eventGroup The event group to destroy
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_OPERATION if the event group is enabled
CUPTI_ERROR_INVALID_PARAMETER if eventGroup is NULL
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiEventGroupDisable (CUpti_EventGroup  eventGroup  ) 
+
+
+ +

+Disable an event group. Disabling an event group stops collection of events contained in the group.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + +
eventGroup The event group
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_HARDWARE 
CUPTI_ERROR_INVALID_PARAMETER if eventGroup is NULL
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiEventGroupEnable (CUpti_EventGroup  eventGroup  ) 
+
+
+ +

+Enable an event group. Enabling an event group zeros the value of all the events in the group and then starts collection of those events.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + +
eventGroup The event group
+
+
Return values:
+ + + + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_HARDWARE 
CUPTI_ERROR_NOT_READY if eventGroup does not contain any events
CUPTI_ERROR_NOT_COMPATIBLE if eventGroup cannot be enabled due to other already enabled event groups
CUPTI_ERROR_INVALID_PARAMETER if eventGroup is NULL
CUPTI_ERROR_HARDWARE_BUSY if another client is profiling and hardware is busy
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEventGroupGetAttribute (CUpti_EventGroup  eventGroup,
CUpti_EventGroupAttribute  attrib,
size_t *  valueSize,
void *  value 
)
+
+
+ +

+Read an event group attribute and return it in *value.

Note:
Thread-safety: this function is thread safe but client must guard against simultaneous destruction or modification of eventGroup (for example, client must guard against simultaneous calls to cuptiEventGroupDestroy, cuptiEventGroupAddEvent, etc.), and must guard against simultaneous destruction of the context in which eventGroup was created (for example, client must guard against simultaneous calls to cudaDeviceReset, cuCtxDestroy, etc.).
+
Parameters:
+ + + + + +
eventGroup The event group
attrib The attribute to read
valueSize Size of buffer pointed by the value, and returns the number of bytes written to value
value Returns the value of the attribute
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_PARAMETER if valueSize or value is NULL, or if attrib is not an eventgroup attribute
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string attribute values, indicates that the value buffer is too small to hold the attribute value.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEventGroupReadAllEvents (CUpti_EventGroup  eventGroup,
CUpti_ReadEventFlags  flags,
size_t *  eventValueBufferSizeBytes,
uint64_t *  eventValueBuffer,
size_t *  eventIdArraySizeBytes,
CUpti_EventID eventIdArray,
size_t *  numEventIdsRead 
)
+
+
+ +

+Read the values for all the events in an event group. The event values are returned in the eventValueBuffer buffer. eventValueBufferSizeBytes indicates the size of eventValueBuffer. The buffer must be at least (sizeof(uint64) * number of events in group) if CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is not set on the group containing the events. The buffer must be at least (sizeof(uint64) * number of domain instances * number of events in group) if CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is set on the group.

+The data format returned in eventValueBuffer is:

    +
  • domain instance 0: event0 event1 ... eventN
  • domain instance 1: event0 event1 ... eventN
  • ...
  • domain instance M: event0 event1 ... eventN
+

+The event order in eventValueBuffer is returned in eventIdArray. The size of eventIdArray is specified in eventIdArraySizeBytes. The size should be at least (sizeof(CUpti_EventID) * number of events in group).

+If any instance of any event counter overflows, the value returned for that event instance will be CUPTI_EVENT_OVERFLOW.

+The only allowed value for flags is CUPTI_EVENT_READ_FLAG_NONE.

+Reading events from a disabled event group is not allowed. After being read, an event's value is reset to zero.

Note:
Thread-safety: this function is thread safe but client must guard against simultaneous destruction or modification of eventGroup (for example, client must guard against simultaneous calls to cuptiEventGroupDestroy, cuptiEventGroupAddEvent, etc.), and must guard against simultaneous destruction of the context in which eventGroup was created (for example, client must guard against simultaneous calls to cudaDeviceReset, cuCtxDestroy, etc.). If cuptiEventGroupResetAllEvents is called simultaneously with this function, then returned event values are undefined.
+
Parameters:
+ + + + + + + + +
eventGroup The event group
flags Flags controlling the reading mode
eventValueBufferSizeBytes The size of eventValueBuffer in bytes, and returns the number of bytes written to eventValueBuffer
eventValueBuffer Returns the event values
eventIdArraySizeBytes The size of eventIdArray in bytes, and returns the number of bytes written to eventIdArray
eventIdArray Returns the IDs of the events in the same order as the values return in eventValueBuffer.
numEventIdsRead Returns the number of event IDs returned in eventIdArray
+
+
Return values:
+ + + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_HARDWARE 
CUPTI_ERROR_INVALID_OPERATION if eventGroup is disabled
CUPTI_ERROR_INVALID_PARAMETER if eventGroup, eventValueBufferSizeBytes, eventValueBuffer, eventIdArraySizeBytes, eventIdArray or numEventIdsRead is NULL
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if size of eventValueBuffer or eventIdArray is not sufficient
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEventGroupReadEvent (CUpti_EventGroup  eventGroup,
CUpti_ReadEventFlags  flags,
CUpti_EventID  event,
size_t *  eventValueBufferSizeBytes,
uint64_t *  eventValueBuffer 
)
+
+
+ +

+Read the value for an event in an event group. The event value is returned in the eventValueBuffer buffer. eventValueBufferSizeBytes indicates the size of the eventValueBuffer buffer. The buffer must be at least sizeof(uint64) if CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is not set on the group containing the event. The buffer must be at least (sizeof(uint64) * number of domain instances) if CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is set on the group.

+If any instance of an event counter overflows, the value returned for that event instance will be CUPTI_EVENT_OVERFLOW.

+The only allowed value for flags is CUPTI_EVENT_READ_FLAG_NONE.

+Reading an event from a disabled event group is not allowed. After being read, an event's value is reset to zero.

Note:
Thread-safety: this function is thread safe but client must guard against simultaneous destruction or modification of eventGroup (for example, client must guard against simultaneous calls to cuptiEventGroupDestroy, cuptiEventGroupAddEvent, etc.), and must guard against simultaneous destruction of the context in which eventGroup was created (for example, client must guard against simultaneous calls to cudaDeviceReset, cuCtxDestroy, etc.). If cuptiEventGroupResetAllEvents is called simultaneously with this function, then returned event values are undefined.
+
Parameters:
+ + + + + + +
eventGroup The event group
flags Flags controlling the reading mode
event The event to read
eventValueBufferSizeBytes The size of eventValueBuffer in bytes, and returns the number of bytes written to eventValueBuffer
eventValueBuffer Returns the event value(s)
+
+
Return values:
+ + + + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_EVENT_ID 
CUPTI_ERROR_HARDWARE 
CUPTI_ERROR_INVALID_OPERATION if eventGroup is disabled
CUPTI_ERROR_INVALID_PARAMETER if eventGroup, eventValueBufferSizeBytes or eventValueBuffer is NULL
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if size of eventValueBuffer is not sufficient
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiEventGroupRemoveAllEvents (CUpti_EventGroup  eventGroup  ) 
+
+
+ +

+Remove all events from an event group. Events cannot be removed if the event group is enabled.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + +
eventGroup The event group
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_OPERATION if eventGroup is enabled
CUPTI_ERROR_INVALID_PARAMETER if eventGroup is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEventGroupRemoveEvent (CUpti_EventGroup  eventGroup,
CUpti_EventID  event 
)
+
+
+ +

+Remove event from the an event group. The event cannot be removed if the event group is enabled.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + +
eventGroup The event group
event The event to remove from the group
+
+
Return values:
+ + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_EVENT_ID 
CUPTI_ERROR_INVALID_OPERATION if eventGroup is enabled
CUPTI_ERROR_INVALID_PARAMETER if eventGroup is NULL
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiEventGroupResetAllEvents (CUpti_EventGroup  eventGroup  ) 
+
+
+ +

+Zero all the event counts in an event group.

Note:
Thread-safety: this function is thread safe but client must guard against simultaneous destruction or modification of eventGroup (for example, client must guard against simultaneous calls to cuptiEventGroupDestroy, cuptiEventGroupAddEvent, etc.), and must guard against simultaneous destruction of the context in which eventGroup was created (for example, client must guard against simultaneous calls to cudaDeviceReset, cuCtxDestroy, etc.).
+
Parameters:
+ + +
eventGroup The event group
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_HARDWARE 
CUPTI_ERROR_INVALID_PARAMETER if eventGroup is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEventGroupSetAttribute (CUpti_EventGroup  eventGroup,
CUpti_EventGroupAttribute  attrib,
size_t  valueSize,
void *  value 
)
+
+
+ +

+Write an event group attribute.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + + + +
eventGroup The event group
attrib The attribute to write
valueSize The size, in bytes, of the value
value The attribute value to write
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_PARAMETER if valueSize or value is NULL, or if attrib is not an event group attribute, or if attrib is not a writable attribute
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT Indicates that the value buffer is too small to hold the attribute value.
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiEventGroupSetDisable (CUpti_EventGroupSet eventGroupSet  ) 
+
+
+ +

+Disable a set of event groups. Disabling a set of event groups stops collection of events contained in the groups.

Note:
Thread-safety: this function is thread safe.

+If this call fails, some of the event groups in the set may be disabled and other event groups may remain enabled.

+
Parameters:
+ + +
eventGroupSet The pointer to the event group set
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_HARDWARE 
CUPTI_ERROR_INVALID_PARAMETER if eventGroupSet is NULL
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiEventGroupSetEnable (CUpti_EventGroupSet eventGroupSet  ) 
+
+
+ +

+Enable a set of event groups. Enabling a set of event groups zeros the value of all the events in all the groups and then starts collection of those events.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + +
eventGroupSet The pointer to the event group set
+
+
Return values:
+ + + + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_HARDWARE 
CUPTI_ERROR_NOT_READY if eventGroup does not contain any events
CUPTI_ERROR_NOT_COMPATIBLE if eventGroup cannot be enabled due to other already enabled event groups
CUPTI_ERROR_INVALID_PARAMETER if eventGroupSet is NULL
CUPTI_ERROR_HARDWARE_BUSY if other client is profiling and hardware is busy
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEventGroupSetsCreate (CUcontext  context,
size_t  eventIdArraySizeBytes,
CUpti_EventID eventIdArray,
CUpti_EventGroupSets **  eventGroupPasses 
)
+
+
+ +

+The number of events that can be collected simultaneously varies by device and by the type of the events. When events can be collected simultaneously, they may need to be grouped into multiple event groups because they are from different event domains. This function takes a set of events and determines how many passes are required to collect all those events, and which events can be collected simultaneously in each pass.

+The CUpti_EventGroupSets returned in eventGroupPasses indicates how many passes are required to collect the events with the numSets field. Within each event group set, the sets array indicates the event groups that should be collected on each pass.

Note:
Thread-safety: this function is thread safe, but client must guard against another thread simultaneously destroying context.
+
Parameters:
+ + + + + +
context The context for event collection
eventIdArraySizeBytes Size of eventIdArray in bytes
eventIdArray Array of event IDs that need to be grouped
eventGroupPasses Returns a CUpti_EventGroupSets object that indicates the number of passes required to collect the events and the events to collect on each pass
+
+
Return values:
+ + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_CONTEXT 
CUPTI_ERROR_INVALID_EVENT_ID 
CUPTI_ERROR_INVALID_PARAMETER if eventIdArray or eventGroupPasses is NULL
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiEventGroupSetsDestroy (CUpti_EventGroupSets eventGroupSets  ) 
+
+
+ +

+Destroy a CUpti_EventGroupSets object.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + +
eventGroupSets The object to destroy
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_OPERATION if any of the event groups contained in the sets is enabled
CUPTI_ERROR_INVALID_PARAMETER if eventGroupSets is NULL
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiGetNumEventDomains (uint32_t *  numDomains  ) 
+
+
+ +

+Returns the total number of event domains available on any CUDA-capable device.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + +
numDomains Returns the number of domains
+
+
Return values:
+ + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if numDomains is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiKernelReplaySubscribeUpdate (CUpti_KernelReplayUpdateFunc  updateFunc,
void *  customData 
)
+
+
+ +

+When subscribed, the function pointer passed in will be called each time a kernel run is finished during kernel replay. Previously subscribed function pointer will be replaced. Pass in NULL as the function pointer unsubscribes the update.

+

Parameters:
+ + + +
updateFunc The update function pointer
customData Pointer to any custom data
+
+
Return values:
+ + +
CUPTI_SUCCESS 
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiSetEventCollectionMode (CUcontext  context,
CUpti_EventCollectionMode  mode 
)
+
+
+ +

+Set the event collection mode for a context. The mode controls the event collection behavior of all events in event groups created in the context. This API is invalid in kernel replay mode.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + +
context The context
mode The event collection mode
+
+
Return values:
+ + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_CONTEXT 
CUPTI_ERROR_INVALID_OPERATION if called when replay mode is enabled
CUPTI_ERROR_NOT_SUPPORTED if mode is not supported on the device
+
+ +
+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/group__CUPTI__METRIC__API.html b/doc/Cupti/group__CUPTI__METRIC__API.html new file mode 100644 index 0000000000000000000000000000000000000000..b8bf3468c4b8ae4e69851250de4ff4e5695f6322 --- /dev/null +++ b/doc/Cupti/group__CUPTI__METRIC__API.html @@ -0,0 +1,1090 @@ + + +Cupti: CUPTI Metric API + + + + + +
+

CUPTI Metric API

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Structures

union  CUpti_MetricValue
 A metric value. More...

Typedefs

typedef uint32_t CUpti_MetricID
 ID for a metric.

Enumerations

enum  CUpti_MetricAttribute {
+  CUPTI_METRIC_ATTR_NAME = 0, +
+  CUPTI_METRIC_ATTR_SHORT_DESCRIPTION = 1, +
+  CUPTI_METRIC_ATTR_LONG_DESCRIPTION = 2, +
+  CUPTI_METRIC_ATTR_CATEGORY = 3, +
+  CUPTI_METRIC_ATTR_VALUE_KIND = 4, +
+  CUPTI_METRIC_ATTR_EVALUATION_MODE = 5 +
+ }
 Metric attributes. More...
enum  CUpti_MetricCategory {
+  CUPTI_METRIC_CATEGORY_MEMORY = 0, +
+  CUPTI_METRIC_CATEGORY_INSTRUCTION = 1, +
+  CUPTI_METRIC_CATEGORY_MULTIPROCESSOR = 2, +
+  CUPTI_METRIC_CATEGORY_CACHE = 3, +
+  CUPTI_METRIC_CATEGORY_TEXTURE = 4, +
+  CUPTI_METRIC_CATEGORY_NVLINK = 5, +
+  CUPTI_METRIC_CATEGORY_PCIE = 6 +
+ }
 A metric category. More...
enum  CUpti_MetricEvaluationMode {
+  CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE = 1, +
+  CUPTI_METRIC_EVALUATION_MODE_AGGREGATE = 1 << 1 +
+ }
 A metric evaluation mode. More...
enum  CUpti_MetricPropertyDeviceClass
 Device class. More...
enum  CUpti_MetricPropertyID
 Metric device properties. More...
enum  CUpti_MetricValueKind {
+  CUPTI_METRIC_VALUE_KIND_DOUBLE = 0, +
+  CUPTI_METRIC_VALUE_KIND_UINT64 = 1, +
+  CUPTI_METRIC_VALUE_KIND_PERCENT = 2, +
+  CUPTI_METRIC_VALUE_KIND_THROUGHPUT = 3, +
+  CUPTI_METRIC_VALUE_KIND_INT64 = 4, +
+  CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL = 5 +
+ }
 Kinds of metric values. More...
enum  CUpti_MetricValueUtilizationLevel
 Enumeration of utilization levels for metrics values of kind CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL. Utilization values can vary from IDLE (0) to MAX (10) but the enumeration only provides specific names for a few values.

Functions

CUptiResult cuptiDeviceEnumMetrics (CUdevice device, size_t *arraySizeBytes, CUpti_MetricID *metricArray)
 Get the metrics for a device.
CUptiResult cuptiDeviceGetNumMetrics (CUdevice device, uint32_t *numMetrics)
 Get the number of metrics for a device.
CUptiResult cuptiEnumMetrics (size_t *arraySizeBytes, CUpti_MetricID *metricArray)
 Get all the metrics available on any device.
CUptiResult cuptiGetNumMetrics (uint32_t *numMetrics)
 Get the total number of metrics available on any device.
CUptiResult cuptiMetricCreateEventGroupSets (CUcontext context, size_t metricIdArraySizeBytes, CUpti_MetricID *metricIdArray, CUpti_EventGroupSets **eventGroupPasses)
 For a set of metrics, get the grouping that indicates the number of passes and the event groups necessary to collect the events required for those metrics.
CUptiResult cuptiMetricEnumEvents (CUpti_MetricID metric, size_t *eventIdArraySizeBytes, CUpti_EventID *eventIdArray)
 Get the events required to calculating a metric.
CUptiResult cuptiMetricEnumProperties (CUpti_MetricID metric, size_t *propIdArraySizeBytes, CUpti_MetricPropertyID *propIdArray)
 Get the properties required to calculating a metric.
CUptiResult cuptiMetricGetAttribute (CUpti_MetricID metric, CUpti_MetricAttribute attrib, size_t *valueSize, void *value)
 Get a metric attribute.
CUptiResult cuptiMetricGetIdFromName (CUdevice device, const char *metricName, CUpti_MetricID *metric)
 Find an metric by name.
CUptiResult cuptiMetricGetNumEvents (CUpti_MetricID metric, uint32_t *numEvents)
 Get number of events required to calculate a metric.
CUptiResult cuptiMetricGetNumProperties (CUpti_MetricID metric, uint32_t *numProp)
 Get number of properties required to calculate a metric.
CUptiResult cuptiMetricGetRequiredEventGroupSets (CUcontext context, CUpti_MetricID metric, CUpti_EventGroupSets **eventGroupSets)
 For a metric get the groups of events that must be collected in the same pass.
CUptiResult cuptiMetricGetValue (CUdevice device, CUpti_MetricID metric, size_t eventIdArraySizeBytes, CUpti_EventID *eventIdArray, size_t eventValueArraySizeBytes, uint64_t *eventValueArray, uint64_t timeDuration, CUpti_MetricValue *metricValue)
 Calculate the value for a metric.
CUptiResult cuptiMetricGetValue2 (CUpti_MetricID metric, size_t eventIdArraySizeBytes, CUpti_EventID *eventIdArray, size_t eventValueArraySizeBytes, uint64_t *eventValueArray, size_t propIdArraySizeBytes, CUpti_MetricPropertyID *propIdArray, size_t propValueArraySizeBytes, uint64_t *propValueArray, CUpti_MetricValue *metricValue)
 Calculate the value for a metric.
+

Detailed Description

+Functions, types, and enums that implement the CUPTI Metric API.

+

Note:
CUPTI metric API from the header cupti_metrics.h are not supported on devices with compute capability 7.5 and higher (i.e. Turing and later GPU architectures). These API will be deprecated in a future CUDA release. These are replaced by Profiling API in the header cupti_profiler_target.h and Perfworks metrics API in the headers nvperf_host.h and nvperf_target.h which are supported on devices with compute capability 7.0 and higher (i.e. Volta and later GPU architectures).
+

Typedef Documentation

+ +
+
+ + + + +
typedef uint32_t CUpti_MetricID
+
+
+ +

+A metric provides a measure of some aspect of the device. +

+

+


Enumeration Type Documentation

+ +
+
+ + + + +
enum CUpti_MetricAttribute
+
+
+ +

+Metric attributes describe properties of a metric. These attributes can be read using cuptiMetricGetAttribute.

Enumerator:
+ + + + + + + +
CUPTI_METRIC_ATTR_NAME  +Metric name. Value is a null terminated const c-string.
CUPTI_METRIC_ATTR_SHORT_DESCRIPTION  +Short description of metric. Value is a null terminated const c-string.
CUPTI_METRIC_ATTR_LONG_DESCRIPTION  +Long description of metric. Value is a null terminated const c-string.
CUPTI_METRIC_ATTR_CATEGORY  +Category of the metric. Value is of type CUpti_MetricCategory.
CUPTI_METRIC_ATTR_VALUE_KIND  +Value type of the metric. Value is of type CUpti_MetricValueKind.
CUPTI_METRIC_ATTR_EVALUATION_MODE  +Metric evaluation mode. Value is of type CUpti_MetricEvaluationMode.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_MetricCategory
+
+
+ +

+Each metric is assigned to a category that represents the general type of the metric. A metric's category is accessed using cuptiMetricGetAttribute and the CUPTI_METRIC_ATTR_CATEGORY attribute.

Enumerator:
+ + + + + + + + +
CUPTI_METRIC_CATEGORY_MEMORY  +A memory related metric.
CUPTI_METRIC_CATEGORY_INSTRUCTION  +An instruction related metric.
CUPTI_METRIC_CATEGORY_MULTIPROCESSOR  +A multiprocessor related metric.
CUPTI_METRIC_CATEGORY_CACHE  +A cache related metric.
CUPTI_METRIC_CATEGORY_TEXTURE  +A texture related metric.
CUPTI_METRIC_CATEGORY_NVLINK  +A Nvlink related metric.
CUPTI_METRIC_CATEGORY_PCIE  +A PCIe related metric.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_MetricEvaluationMode
+
+
+ +

+A metric can be evaluated per hardware instance to know the load balancing across instances of a domain or the metric can be evaluated in aggregate mode when the events involved in metric evaluation are from different event domains. It might be possible to evaluate some metrics in both modes for convenience. A metric's evaluation mode is accessed using CUpti_MetricEvaluationMode and the CUPTI_METRIC_ATTR_EVALUATION_MODE attribute.

Enumerator:
+ + + +
CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE  +If this bit is set, the metric can be profiled for each instance of the domain. The event values passed to cuptiMetricGetValue can contain values for one instance of the domain. And cuptiMetricGetValue can be called for each instance.
CUPTI_METRIC_EVALUATION_MODE_AGGREGATE  +If this bit is set, the metric can be profiled over all instances. The event values passed to cuptiMetricGetValue can be aggregated values of events for all instances of the domain.
+
+ +
+

+ +

+ +
+ +

+Enumeration of device classes for metric property CUPTI_METRIC_PROPERTY_DEVICE_CLASS. +

+

+ +

+
+ + + + +
enum CUpti_MetricPropertyID
+
+
+ +

+Metric device properties describe device properties which are needed for a metric. Some of these properties can be collected using cuDeviceGetAttribute. +

+

+ +

+
+ + + + +
enum CUpti_MetricValueKind
+
+
+ +

+Metric values can be one of several different kinds. Corresponding to each kind is a member of the CUpti_MetricValue union. The metric value returned by cuptiMetricGetValue should be accessed using the appropriate member of that union based on its value kind.

Enumerator:
+ + + + + + + +
CUPTI_METRIC_VALUE_KIND_DOUBLE  +The metric value is a 64-bit double.
CUPTI_METRIC_VALUE_KIND_UINT64  +The metric value is a 64-bit unsigned integer.
CUPTI_METRIC_VALUE_KIND_PERCENT  +The metric value is a percentage represented by a 64-bit double. For example, 57.5% is represented by the value 57.5.
CUPTI_METRIC_VALUE_KIND_THROUGHPUT  +The metric value is a throughput represented by a 64-bit integer. The unit for throughput values is bytes/second.
CUPTI_METRIC_VALUE_KIND_INT64  +The metric value is a 64-bit signed integer.
CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL  +The metric value is a utilization level, as represented by CUpti_MetricValueUtilizationLevel.
+
+ +
+

+


Function Documentation

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiDeviceEnumMetrics (CUdevice  device,
size_t *  arraySizeBytes,
CUpti_MetricID metricArray 
)
+
+
+ +

+Returns the metric IDs in metricArray for a device. The size of the metricArray buffer is given by *arraySizeBytes. The size of the metricArray buffer must be at least numMetrics * sizeof(CUpti_MetricID) or else all metric IDs will not be returned. The value returned in *arraySizeBytes contains the number of bytes returned in metricArray.

+

Parameters:
+ + + + +
device The CUDA device
arraySizeBytes The size of metricArray in bytes, and returns the number of bytes written to metricArray
metricArray Returns the IDs of the metrics for the device
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_DEVICE 
CUPTI_ERROR_INVALID_PARAMETER if arraySizeBytes or metricArray are NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiDeviceGetNumMetrics (CUdevice  device,
uint32_t *  numMetrics 
)
+
+
+ +

+Returns the number of metrics available for a device.

+

Parameters:
+ + + +
device The CUDA device
numMetrics Returns the number of metrics available for the device
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_DEVICE 
CUPTI_ERROR_INVALID_PARAMETER if numMetrics is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiEnumMetrics (size_t *  arraySizeBytes,
CUpti_MetricID metricArray 
)
+
+
+ +

+Returns the metric IDs in metricArray for all CUDA-capable devices. The size of the metricArray buffer is given by *arraySizeBytes. The size of the metricArray buffer must be at least numMetrics * sizeof(CUpti_MetricID) or all metric IDs will not be returned. The value returned in *arraySizeBytes contains the number of bytes returned in metricArray.

+

Parameters:
+ + + +
arraySizeBytes The size of metricArray in bytes, and returns the number of bytes written to metricArray
metricArray Returns the IDs of the metrics
+
+
Return values:
+ + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if arraySizeBytes or metricArray are NULL
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiGetNumMetrics (uint32_t *  numMetrics  ) 
+
+
+ +

+Returns the total number of metrics available on any CUDA-capable devices.

+

Parameters:
+ + +
numMetrics Returns the number of metrics
+
+
Return values:
+ + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if numMetrics is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiMetricCreateEventGroupSets (CUcontext  context,
size_t  metricIdArraySizeBytes,
CUpti_MetricID metricIdArray,
CUpti_EventGroupSets **  eventGroupPasses 
)
+
+
+ +

+For a set of metrics, get the grouping that indicates the number of passes and the event groups necessary to collect the events required for those metrics.

+

See also:
cuptiEventGroupSetsCreate for details on event group set creation.
+
Parameters:
+ + + + + +
context The context for event collection
metricIdArraySizeBytes Size of the metricIdArray in bytes
metricIdArray Array of metric IDs
eventGroupPasses Returns a CUpti_EventGroupSets object that indicates the number of passes required to collect the events and the events to collect on each pass
+
+
Return values:
+ + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_CONTEXT 
CUPTI_ERROR_INVALID_METRIC_ID 
CUPTI_ERROR_INVALID_PARAMETER if metricIdArray or eventGroupPasses is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiMetricEnumEvents (CUpti_MetricID  metric,
size_t *  eventIdArraySizeBytes,
CUpti_EventID eventIdArray 
)
+
+
+ +

+Gets the event IDs in eventIdArray required to calculate a metric. The size of the eventIdArray buffer is given by *eventIdArraySizeBytes and must be at least numEvents * sizeof(CUpti_EventID) or all events will not be returned. The value returned in *eventIdArraySizeBytes contains the number of bytes returned in eventIdArray.

+

Parameters:
+ + + + +
metric ID of the metric
eventIdArraySizeBytes The size of eventIdArray in bytes, and returns the number of bytes written to eventIdArray
eventIdArray Returns the IDs of the events required to calculate metric
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_METRIC_ID 
CUPTI_ERROR_INVALID_PARAMETER if eventIdArraySizeBytes or eventIdArray are NULL.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiMetricEnumProperties (CUpti_MetricID  metric,
size_t *  propIdArraySizeBytes,
CUpti_MetricPropertyID propIdArray 
)
+
+
+ +

+Gets the property IDs in propIdArray required to calculate a metric. The size of the propIdArray buffer is given by *propIdArraySizeBytes and must be at least numProp * sizeof(CUpti_DeviceAttribute) or all properties will not be returned. The value returned in *propIdArraySizeBytes contains the number of bytes returned in propIdArray.

+

Parameters:
+ + + + +
metric ID of the metric
propIdArraySizeBytes The size of propIdArray in bytes, and returns the number of bytes written to propIdArray
propIdArray Returns the IDs of the properties required to calculate metric
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_METRIC_ID 
CUPTI_ERROR_INVALID_PARAMETER if propIdArraySizeBytes or propIdArray are NULL.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiMetricGetAttribute (CUpti_MetricID  metric,
CUpti_MetricAttribute  attrib,
size_t *  valueSize,
void *  value 
)
+
+
+ +

+Returns a metric attribute in *value. The size of the value buffer is given by *valueSize. The value returned in *valueSize contains the number of bytes returned in value.

+If the attribute value is a c-string that is longer than *valueSize, then only the first *valueSize characters will be returned and there will be no terminating null byte.

+

Parameters:
+ + + + + +
metric ID of the metric
attrib The metric attribute to read
valueSize The size of the value buffer in bytes, and returns the number of bytes written to value
value Returns the attribute's value
+
+
Return values:
+ + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_METRIC_ID 
CUPTI_ERROR_INVALID_PARAMETER if valueSize or value is NULL, or if attrib is not a metric attribute
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string attribute values, indicates that the value buffer is too small to hold the attribute value.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiMetricGetIdFromName (CUdevice  device,
const char *  metricName,
CUpti_MetricID metric 
)
+
+
+ +

+Find a metric by name and return the metric ID in *metric.

+

Parameters:
+ + + + +
device The CUDA device
metricName The name of metric to find
metric Returns the ID of the found metric or undefined if unable to find the metric
+
+
Return values:
+ + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_DEVICE 
CUPTI_ERROR_INVALID_METRIC_NAME if unable to find a metric with name metricName. In this case *metric is undefined
CUPTI_ERROR_INVALID_PARAMETER if metricName or metric are NULL.
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiMetricGetNumEvents (CUpti_MetricID  metric,
uint32_t *  numEvents 
)
+
+
+ +

+Returns the number of events in numEvents that are required to calculate a metric.

+

Parameters:
+ + + +
metric ID of the metric
numEvents Returns the number of events required for the metric
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_METRIC_ID 
CUPTI_ERROR_INVALID_PARAMETER if numEvents is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiMetricGetNumProperties (CUpti_MetricID  metric,
uint32_t *  numProp 
)
+
+
+ +

+Returns the number of properties in numProp that are required to calculate a metric.

+

Parameters:
+ + + +
metric ID of the metric
numProp Returns the number of properties required for the metric
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_METRIC_ID 
CUPTI_ERROR_INVALID_PARAMETER if numProp is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiMetricGetRequiredEventGroupSets (CUcontext  context,
CUpti_MetricID  metric,
CUpti_EventGroupSets **  eventGroupSets 
)
+
+
+ +

+For a metric get the groups of events that must be collected in the same pass to ensure that the metric is calculated correctly. If the events are not collected as specified then the metric value may be inaccurate.

+The function returns NULL if a metric does not have any required event group. In this case the events needed for the metric can be grouped in any manner for collection.

+

Parameters:
+ + + + +
context The context for event collection
metric The metric ID
eventGroupSets Returns a CUpti_EventGroupSets object that indicates the events that must be collected in the same pass to ensure the metric is calculated correctly. Returns NULL if no grouping is required for metric
+
+
Return values:
+ + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_METRIC_ID 
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiMetricGetValue (CUdevice  device,
CUpti_MetricID  metric,
size_t  eventIdArraySizeBytes,
CUpti_EventID eventIdArray,
size_t  eventValueArraySizeBytes,
uint64_t *  eventValueArray,
uint64_t  timeDuration,
CUpti_MetricValue metricValue 
)
+
+
+ +

+Use the events collected for a metric to calculate the metric value. Metric value evaluation depends on the evaluation mode CUpti_MetricEvaluationMode that the metric supports. If a metric has evaluation mode as CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE, then it assumes that the input event value is for one domain instance. If a metric has evaluation mode as CUPTI_METRIC_EVALUATION_MODE_AGGREGATE, it assumes that input event values are normalized to represent all domain instances on a device. For the most accurate metric collection, the events required for the metric should be collected for all profiled domain instances. For example, to collect all instances of an event, set the CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES attribute on the group containing the event to 1. The normalized value for the event is then: (sum_event_values * totalInstanceCount) / instanceCount, where sum_event_values is the summation of the event values across all profiled domain instances, totalInstanceCount is obtained from querying CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT and instanceCount is obtained from querying CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT (or CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT).

+

Parameters:
+ + + + + + + + + +
device The CUDA device that the metric is being calculated for
metric The metric ID
eventIdArraySizeBytes The size of eventIdArray in bytes
eventIdArray The event IDs required to calculate metric
eventValueArraySizeBytes The size of eventValueArray in bytes
eventValueArray The normalized event values required to calculate metric. The values must be order to match the order of events in eventIdArray
timeDuration The duration over which the events were collected, in ns
metricValue Returns the value for the metric
+
+
Return values:
+ + + + + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_METRIC_ID 
CUPTI_ERROR_INVALID_OPERATION 
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if the eventIdArray does not contain all the events needed for metric
CUPTI_ERROR_INVALID_EVENT_VALUE if any of the event values required for the metric is CUPTI_EVENT_OVERFLOW
CUPTI_ERROR_INVALID_METRIC_VALUE if the computed metric value cannot be represented in the metric's value type. For example, if the metric value type is unsigned and the computed metric value is negative
CUPTI_ERROR_INVALID_PARAMETER if metricValue, eventIdArray or eventValueArray is NULL
+
+ +
+

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUptiResult cuptiMetricGetValue2 (CUpti_MetricID  metric,
size_t  eventIdArraySizeBytes,
CUpti_EventID eventIdArray,
size_t  eventValueArraySizeBytes,
uint64_t *  eventValueArray,
size_t  propIdArraySizeBytes,
CUpti_MetricPropertyID propIdArray,
size_t  propValueArraySizeBytes,
uint64_t *  propValueArray,
CUpti_MetricValue metricValue 
)
+
+
+ +

+Use the events and properties collected for a metric to calculate the metric value. Metric value evaluation depends on the evaluation mode CUpti_MetricEvaluationMode that the metric supports. If a metric has evaluation mode as CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE, then it assumes that the input event value is for one domain instance. If a metric has evaluation mode as CUPTI_METRIC_EVALUATION_MODE_AGGREGATE, it assumes that input event values are normalized to represent all domain instances on a device. For the most accurate metric collection, the events required for the metric should be collected for all profiled domain instances. For example, to collect all instances of an event, set the CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES attribute on the group containing the event to 1. The normalized value for the event is then: (sum_event_values * totalInstanceCount) / instanceCount, where sum_event_values is the summation of the event values across all profiled domain instances, totalInstanceCount is obtained from querying CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT and instanceCount is obtained from querying CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT (or CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT).

+

Parameters:
+ + + + + + + + + + + +
metric The metric ID
eventIdArraySizeBytes The size of eventIdArray in bytes
eventIdArray The event IDs required to calculate metric
eventValueArraySizeBytes The size of eventValueArray in bytes
eventValueArray The normalized event values required to calculate metric. The values must be order to match the order of events in eventIdArray
propIdArraySizeBytes The size of propIdArray in bytes
propIdArray The metric property IDs required to calculate metric
propValueArraySizeBytes The size of propValueArray in bytes
propValueArray The metric property values required to calculate metric. The values must be order to match the order of metric properties in propIdArray
metricValue Returns the value for the metric
+
+
Return values:
+ + + + + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_NOT_INITIALIZED 
CUPTI_ERROR_INVALID_METRIC_ID 
CUPTI_ERROR_INVALID_OPERATION 
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if the eventIdArray does not contain all the events needed for metric
CUPTI_ERROR_INVALID_EVENT_VALUE if any of the event values required for the metric is CUPTI_EVENT_OVERFLOW
CUPTI_ERROR_NOT_COMPATIBLE if the computed metric value cannot be represented in the metric's value type. For example, if the metric value type is unsigned and the computed metric value is negative
CUPTI_ERROR_INVALID_PARAMETER if metricValue, eventIdArray or eventValueArray is NULL
+
+ +
+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/group__CUPTI__PCSAMPLING__API.html b/doc/Cupti/group__CUPTI__PCSAMPLING__API.html new file mode 100644 index 0000000000000000000000000000000000000000..193f1fda59316b5b7d8a628477fd2a63d7b1ec17 --- /dev/null +++ b/doc/Cupti/group__CUPTI__PCSAMPLING__API.html @@ -0,0 +1,646 @@ + + +Cupti: CUPTI PC Sampling API + + + + + +
+

CUPTI PC Sampling API

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Structures

struct  CUpti_GetCubinCrcParams
 Params for cuptiGetCubinCrc. More...
struct  CUpti_GetSassToSourceCorrelationParams
 Params for cuptiGetSassToSourceCorrelation. More...
struct  CUpti_PCSamplingConfigurationInfo
 PC sampling configuration information structure. More...
struct  CUpti_PCSamplingConfigurationInfoParams
 PC sampling configuration structure. More...
struct  CUpti_PCSamplingData
 Collected PC Sampling data. More...
struct  CUpti_PCSamplingDisableParams
 Params for cuptiPCSamplingDisable. More...
struct  CUpti_PCSamplingEnableParams
 Params for cuptiPCSamplingEnable. More...
struct  CUpti_PCSamplingGetDataParams
 Params for cuptiPCSamplingEnable. More...
struct  CUpti_PCSamplingGetNumStallReasonsParams
 Params for cuptiPCSamplingGetNumStallReasons. More...
struct  CUpti_PCSamplingGetStallReasonsParams
 Params for cuptiPCSamplingGetStallReasons. More...
struct  CUpti_PCSamplingPCData
 PC Sampling data. More...
struct  CUpti_PCSamplingStallReason
 PC Sampling stall reasons. More...
struct  CUpti_PCSamplingStartParams
 Params for cuptiPCSamplingStart. More...
struct  CUpti_PCSamplingStopParams
 Params for cuptiPCSamplingStop. More...

Typedefs

typedef void(* CUpti_ComputeCrcCallbackFunc )(const void *cubin, size_t cubinSize, uint64_t *cubinCrc)
 Function type for callback used by CUPTI to request crc of loaded module.

Enumerations

enum  CUpti_PCSamplingCollectionMode {
+  CUPTI_PC_SAMPLING_COLLECTION_MODE_INVALID = 0, +
+  CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS = 1, +
+  CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED = 2 +
+ }
 PC Sampling collection mode. More...
enum  CUpti_PCSamplingConfigurationAttributeType { ,
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD = 1, +
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON = 2, +
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SCRATCH_BUFFER_SIZE = 3, +
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE = 4, +
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE = 5, +
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL = 6, +
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_OUTPUT_DATA_FORMAT = 7, +
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_DATA_BUFFER = 8 +
+ }
 PC Sampling configuration attributes. More...
enum  CUpti_PCSamplingOutputDataFormat { , CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_PARSED = 1 + }
 PC Sampling output data format. More...

Functions

CUptiResult cuptiGetCubinCrc (CUpti_GetCubinCrcParams *pParams)
 Get the CRC of cubin.
CUptiResult cuptiGetSassToSourceCorrelation (CUpti_GetSassToSourceCorrelationParams *pParams)
 SASS to Source correlation.
CUptiResult cuptiPCSamplingDisable (CUpti_PCSamplingDisableParams *pParams)
 Disable PC sampling.
CUptiResult cuptiPCSamplingEnable (CUpti_PCSamplingEnableParams *pParams)
 Enable PC sampling.
CUptiResult cuptiPCSamplingGetConfigurationAttribute (CUpti_PCSamplingConfigurationInfoParams *pParams)
 Read PC Sampling configuration attribute.
CUptiResult cuptiPCSamplingGetData (CUpti_PCSamplingGetDataParams *pParams)
 Flush GPU PC sampling data periodically.
CUptiResult cuptiPCSamplingGetNumStallReasons (CUpti_PCSamplingGetNumStallReasonsParams *pParams)
 Get PC sampling stall reason count.
CUptiResult cuptiPCSamplingGetStallReasons (CUpti_PCSamplingGetStallReasonsParams *pParams)
 Get PC sampling stall reasons.
CUptiResult cuptiPCSamplingSetConfigurationAttribute (CUpti_PCSamplingConfigurationInfoParams *pParams)
 Write PC Sampling configuration attribute.
CUptiResult cuptiPCSamplingStart (CUpti_PCSamplingStartParams *pParams)
 Start PC sampling.
CUptiResult cuptiPCSamplingStop (CUpti_PCSamplingStopParams *pParams)
 Stop PC sampling.
CUptiResult cuptiRegisterComputeCrcCallback (CUpti_ComputeCrcCallbackFunc funcComputeCubinCrc)
 Register callback function with CUPTI to use your own algorithm to compute cubin crc.
+

Detailed Description

+Functions, types, and enums that implement the CUPTI PC Sampling API.

Typedef Documentation

+ +
+
+ + + + +
typedef void( * CUpti_ComputeCrcCallbackFunc)(const void *cubin, size_t cubinSize, uint64_t *cubinCrc)
+
+
+ +

+This callback function ask for crc of provided module in function. The provided crc will be stored in PC sampling records i.e. in the field 'cubinCrc' of the PC sampling struct CUpti_PCSamplingPCData. The CRC is uses during the offline source correlation to uniquely identify the module.

+

Parameters:
+ + + + +
cubin The pointer to cubin binary
cubinSize The size of cubin binary.
cubinCrc Returns the computed crc of cubin.
+
+ +
+

+


Enumeration Type Documentation

+ +
+ +
+ +

+

Enumerator:
+ + + + +
CUPTI_PC_SAMPLING_COLLECTION_MODE_INVALID  +INVALID Value
CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS  +Continuous mode. Kernels are not serialized in this mode.
CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED  +Serialized mode. Kernels are serialized in this mode.
+
+ +
+

+ +

+ +
+ +

+PC Sampling configuration attribute types. These attributes can be read using cuptiPCSamplingGetConfigurationAttribute and can be written using cuptiPCSamplingSetConfigurationAttribute. Attributes marked [r] can only be read using cuptiPCSamplingGetConfigurationAttribute [w] can only be written using cuptiPCSamplingSetConfigurationAttribute [rw] can be read using cuptiPCSamplingGetConfigurationAttribute and written using cuptiPCSamplingSetConfigurationAttribute

Enumerator:
+ + + + + + + + + +
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD  +[rw] Sampling period for PC Sampling. DEFAULT - CUPTI defined value based on number of SMs Valid values for the sampling periods are between 5 to 31 both inclusive. This will set the sampling period to (2^samplingPeriod) cycles. For e.g. for sampling period = 5 to 31, cycles = 32, 64, 128,..., 2^31 Value is a uint32_t
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON  +[w] Number of stall reasons to collect. DEFAULT - All stall reasons will be collected Value is a size_t [w] Stall reasons to collect DEFAULT - All stall reasons will be collected Input value should be a pointer pointing to array of stall reason indexes containing all the stall reason indexes to collect.
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SCRATCH_BUFFER_SIZE  +[rw] Size of SW buffer for raw PC counter data downloaded from HW buffer DEFAULT - 1 MB, which can accommodate approximately 5500 PCs with all stall reasons Approximately it takes 16 Bytes (and some fixed size memory) to accommodate one PC with one stall reason For e.g. 1 PC with 1 stall reason = 32 Bytes 1 PC with 2 stall reason = 48 Bytes 1 PC with 4 stall reason = 96 Bytes Value is a size_t
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE  +[rw] Size of HW buffer in bytes DEFAULT - 512 MB If sampling period is too less, HW buffer can overflow and drop PC data Value is a size_t
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE  +[rw] PC Sampling collection mode DEFAULT - CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS Input value should be of type CUpti_PCSamplingCollectionMode.
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL  +[rw] Control over PC Sampling data collection range Default - 0 1 - Allows user to start and stop PC Sampling using APIs - cuptiPCSamplingStart() - Start PC Sampling cuptiPCSamplingStop() - Stop PC Sampling Value is a uint32_t
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_OUTPUT_DATA_FORMAT  +[w] Value for output data format Default - CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_PARSED Input value should be of type CUpti_PCSamplingOutputDataFormat.
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_DATA_BUFFER  +[w] Data buffer to hold collected PC Sampling data PARSED_DATA Default - none. Buffer type is void * which can point to PARSED_DATA Refer CUpti_PCSamplingData for buffer format for PARSED_DATA
+
+ +
+

+ +

+ +
+ +

+

Enumerator:
+ + +
CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_PARSED  +HW buffer data will be parsed during collection of data
+
+ +
+

+


Function Documentation

+ +
+
+ + + + + + + + + +
CUptiResult cuptiGetCubinCrc (CUpti_GetCubinCrcParams pParams  ) 
+
+
+ +

+This function returns the CRC of provided cubin binary.

+

Parameters:
+ + +
Refer CUpti_GetCubinCrcParams
+
+
Return values:
+ + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if parameter cubin is NULL or provided cubinSize is zero or size field is not set.
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiGetSassToSourceCorrelation (CUpti_GetSassToSourceCorrelationParams pParams  ) 
+
+
+ +

+

Parameters:
+ + +
Refer CUpti_GetSassToSourceCorrelationParams
+
+It is expected from user to free allocated memory for fileName and dirName after use.

+

Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if either of the parameters cubin or functionName is NULL or cubinSize is zero or size field is not set correctly.
CUPTI_ERROR_INVALID_MODULE provided cubin is invalid.
CUPTI_ERROR_UNKNOWN an internal error occurred. This error code is also used for cases when the function is not present in the module. A better error code will be returned in the future release.
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiPCSamplingDisable (CUpti_PCSamplingDisableParams pParams  ) 
+
+
+ +

+For application which doesn't destroy the CUDA context explicitly, this API does the PC Sampling tear-down, joins threads and copies PC records in the buffer provided during the PC sampling configuration. PC records which can't be accommodated in the buffer are discarded.

+

Parameters:
+ + +
Refer CUpti_PCSamplingDisableParams
+
+
Return values:
+ + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if any pParams is not valid
CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device does not support the API
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiPCSamplingEnable (CUpti_PCSamplingEnableParams pParams  ) 
+
+
+ +

+

Parameters:
+ + +
Refer CUpti_PCSamplingEnableParams
+
+
Return values:
+ + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if any pParams is not valid
CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device does not support the API
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiPCSamplingGetConfigurationAttribute (CUpti_PCSamplingConfigurationInfoParams pParams  ) 
+
+
+ +

+

Parameters:
+ + +
pParams A pointer to CUpti_PCSamplingConfigurationInfoParams containing PC sampling configuration.
+
+
Return values:
+ + + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_OPERATION if this API is called with some invalid attribute.
CUPTI_ERROR_INVALID_PARAMETER if attrib is not valid or any pParams is not valid
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT indicates that the value buffer is too small to hold the attribute value
CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device does not support the API
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiPCSamplingGetData (CUpti_PCSamplingGetDataParams pParams  ) 
+
+
+ +

+Flushing of GPU PC Sampling data is required at following point to maintain uniqueness of PCs: For CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS, after every module load-unload-load For CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED, after every kernel ends If configuration option CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL is enabled, then after every range end i.e. cuptiPCSamplingStop() If application is profiled in CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS, with disabled CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL, and there is no module unload, user can collect data in two ways: Use cuptiPCSamplingGetData() API periodically Use cuptiPCSamplingDisable() on application exit and read GPU PC sampling data from sampling data buffer passed during configuration. Note: In case, cuptiPCSamplingGetData() API is not called periodically, then sampling data buffer passed during configuration should be large enough to hold all PCs data. cuptiPCSamplingGetData() API never does device synchronization. It is possible that when the API is called there is some unconsumed data from the HW buffer. In this case CUPTI provides only the data available with it at that moment.

Parameters:
+ + +
Refer CUpti_PCSamplingGetDataParams
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_OPERATION if this API is called without enabling PC sampling.
CUPTI_ERROR_INVALID_PARAMETER if any pParams is not valid
CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device does not support the API
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiPCSamplingGetNumStallReasons (CUpti_PCSamplingGetNumStallReasonsParams pParams  ) 
+
+
+ +

+

Parameters:
+ + +
Refer CUpti_PCSamplingGetNumStallReasonsParams
+
+
Return values:
+ + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if any pParams is not valid
CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device does not support the API
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiPCSamplingGetStallReasons (CUpti_PCSamplingGetStallReasonsParams pParams  ) 
+
+
+ +

+

Parameters:
+ + +
Refer CUpti_PCSamplingGetStallReasonsParams
+
+
Return values:
+ + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if any pParams is not valid
CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device does not support the API
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiPCSamplingSetConfigurationAttribute (CUpti_PCSamplingConfigurationInfoParams pParams  ) 
+
+
+ +

+

Parameters:
+ + +
pParams A pointer to CUpti_PCSamplingConfigurationInfoParams containing PC sampling configuration.
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_OPERATION if this API is called with some invalid attrib.
CUPTI_ERROR_INVALID_PARAMETER if attribute value is not valid or any pParams is not valid
CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device does not support the API
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiPCSamplingStart (CUpti_PCSamplingStartParams pParams  ) 
+
+
+ +

+User can collect PC Sampling data for user-defined range specified by Start/Stop APIs. This API can be used to mark starting of range. Set configuration option CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL to use this API.

Parameters:
+ + +
Refer CUpti_PCSamplingStartParams
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_OPERATION if this API is called with incorrect PC Sampling configuration.
CUPTI_ERROR_INVALID_PARAMETER if any pParams is not valid
CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device does not support the API
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiPCSamplingStop (CUpti_PCSamplingStopParams pParams  ) 
+
+
+ +

+User can collect PC Sampling data for user-defined range specified by Start/Stop APIs. This API can be used to mark end of range. Set configuration option CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL to use this API.

Parameters:
+ + +
Refer CUpti_PCSamplingStopParams
+
+
Return values:
+ + + + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_OPERATION if this API is called with incorrect PC Sampling configuration.
CUPTI_ERROR_INVALID_PARAMETER if any pParams is not valid
CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device does not support the API
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiRegisterComputeCrcCallback (CUpti_ComputeCrcCallbackFunc  funcComputeCubinCrc  ) 
+
+
+ +

+This function registers a callback function and it gets called from CUPTI when a CUDA module is loaded.

+

Parameters:
+ + +
funcComputeCubinCrc callback is invoked when a CUDA module is loaded.
+
+
Return values:
+ + + +
CUPTI_SUCCESS 
CUPTI_ERROR_INVALID_PARAMETER if funcComputeCubinCrc is NULL.
+
+ +
+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/group__CUPTI__PCSAMPLING__UTILITY.html b/doc/Cupti/group__CUPTI__PCSAMPLING__UTILITY.html new file mode 100644 index 0000000000000000000000000000000000000000..ed538669e6b33c621e5538f6d292a5911a15f461 --- /dev/null +++ b/doc/Cupti/group__CUPTI__PCSAMPLING__UTILITY.html @@ -0,0 +1,305 @@ + + +Cupti: CUPTI PC Sampling Utility API + + + + + +
+

CUPTI PC Sampling Utility API

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Structures

struct  BufferInfo
 BufferInfo will be stored in the file for every buffer i.e for every call of UtilDumpPcSamplingBufferInFile() API. More...
struct  CUPTI::PcSamplingUtil::CUptiUtil_GetBufferInfoParams
 Params for CuptiUtilGetBufferInfo. More...
struct  CUPTI::PcSamplingUtil::CUptiUtil_GetHeaderDataParams
 Params for CuptiUtilGetHeaderData. More...
struct  CUPTI::PcSamplingUtil::CUptiUtil_GetPcSampDataParams
 Params for CuptiUtilGetPcSampData. More...
struct  CUPTI::PcSamplingUtil::CUptiUtil_MergePcSampDataParams
 Params for CuptiUtilMergePcSampData. More...
struct  CUPTI::PcSamplingUtil::CUptiUtil_PutPcSampDataParams
 Params for CuptiUtilPutPcSampData. More...
struct  Header
 Header info will be stored in file. More...
struct  PcSamplingStallReasons
 All available stall reasons name and respective indexes will be stored in it. More...

Enumerations

enum  CUPTI::PcSamplingUtil::CUptiUtilResult {
+  CUPTI::PcSamplingUtil::CUPTI_UTIL_SUCCESS = 0, +
+  CUPTI::PcSamplingUtil::CUPTI_UTIL_ERROR_INVALID_PARAMETER = 1, +
+  CUPTI::PcSamplingUtil::CUPTI_UTIL_ERROR_UNABLE_TO_CREATE_FILE = 2, +
+  CUPTI::PcSamplingUtil::CUPTI_UTIL_ERROR_UNABLE_TO_OPEN_FILE = 3, +
+  CUPTI::PcSamplingUtil::CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED = 4, +
+  CUPTI::PcSamplingUtil::CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED = 5, +
+  CUPTI::PcSamplingUtil::CUPTI_UTIL_ERROR_SEEK_OPERATION_FAILED = 6, +
+  CUPTI::PcSamplingUtil::CUPTI_UTIL_ERROR_OUT_OF_MEMORY = 7, +
+  CUPTI::PcSamplingUtil::CUPTI_UTIL_ERROR_UNKNOWN = 999 +
+ }
 CUPTI PC sampling utility API result codes. More...
enum  CUPTI::PcSamplingUtil::PcSamplingBufferType {
+  CUPTI::PcSamplingUtil::PC_SAMPLING_BUFFER_INVALID = 0, +
+  CUPTI::PcSamplingUtil::PC_SAMPLING_BUFFER_PC_TO_COUNTER_DATA = 1 +
+ }

Functions

CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilGetBufferInfo (CUptiUtil_GetBufferInfoParams *pParams)
 Get buffer info data of file.
CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilGetHeaderData (CUptiUtil_GetHeaderDataParams *pParams)
 Get header data of file.
CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilGetPcSampData (CUptiUtil_GetPcSampDataParams *pParams)
 Retrieve PC sampling data from file into allocated buffer.
CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilMergePcSampData (CUptiUtil_MergePcSampDataParams *pParams)
 Merge PC sampling data range id wise.
CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilPutPcSampData (CUptiUtil_PutPcSampDataParams *pParams)
 Dump PC sampling data into the file.
+

Detailed Description

+Functions, types, and enums that implement the CUPTI PC Sampling Utility API.

Enumeration Type Documentation

+ +
+ +
+ +

+Error and result codes returned by CUPTI PC sampling utility API.

Enumerator:
+ + + + + + + + + + +
CUPTI_UTIL_SUCCESS  +No error
CUPTI_UTIL_ERROR_INVALID_PARAMETER  +One or more of the parameters are invalid.
CUPTI_UTIL_ERROR_UNABLE_TO_CREATE_FILE  +Unable to create a new file
CUPTI_UTIL_ERROR_UNABLE_TO_OPEN_FILE  +Unable to open a file
CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED  +Read or write operation failed
CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED  +Provided file handle is corrupted.
CUPTI_UTIL_ERROR_SEEK_OPERATION_FAILED  +seek operation failed.
CUPTI_UTIL_ERROR_OUT_OF_MEMORY  +Unable to allocate enough memory to perform the requested operation.
CUPTI_UTIL_ERROR_UNKNOWN  +An unknown internal error has occurred.
+
+ +
+

+ +

+ +
+ +

+

Enumerator:
+ + + +
PC_SAMPLING_BUFFER_INVALID  +Invalid buffer type.
PC_SAMPLING_BUFFER_PC_TO_COUNTER_DATA  +Refers to CUpti_PCSamplingData buffer.
+
+ +
+

+


Function Documentation

+ +
+
+ + + + + + + + + +
CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilGetBufferInfo (CUptiUtil_GetBufferInfoParams *  pParams  ) 
+
+
+ +

+This API must be called every time before calling CuptiUtilGetPcSampData API. BufferInfo structure, it gives info about recordCount and stallReasonCount of every record in the buffer. This will help to allocate exact buffer to retrieve data into it.

+

Return values:
+ + + + + +
CUPTI_UTIL_SUCCESS 
CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if either of pParam or fileHandle is NULL or param struct size is incorrect.
CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file.
CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED failed to read data from file.
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilGetHeaderData (CUptiUtil_GetHeaderDataParams *  pParams  ) 
+
+
+ +

+This API must be called once initially while retrieving data from file. Header structure, it gives info about total number of buffers present in the file.

+

Return values:
+ + + + + +
CUPTI_UTIL_SUCCESS 
CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if either of pParam or fileHandle is NULL or param struct size is incorrect.
CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file
CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED failed to read data from file.
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilGetPcSampData (CUptiUtil_GetPcSampDataParams *  pParams  ) 
+
+
+ +

+This API must be called after CuptiUtilGetBufferInfo API. It will retrieve data from file into allocated buffer.

+

Return values:
+ + + + + +
CUPTI_UTIL_SUCCESS 
CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if buffer type is invalid or if either of pSampData, pParams is NULL. If pPcSamplingStallReasons is not NULL then error out if either of stallReasonIndex, stallReasons or stallReasons array element pointer is NULL. or filename is empty.
CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED 
CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file.
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilMergePcSampData (CUptiUtil_MergePcSampDataParams *  pParams  ) 
+
+
+ +

+This API merge PC sampling data range id wise. It allocates memory for merged data and fill data in it and provide buffer pointer in MergedPcSampDataBuffers field. It is expected from user to free merge data buffers after use.

+

Return values:
+ + + + +
CUPTI_UTIL_SUCCESS 
CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if param struct size is invalid or count of buffers to merge is invalid i.e less than 1 or either of PcSampDataBuffer, MergedPcSampDataBuffers, numMergedBuffer is NULL
CUPTI_UTIL_ERROR_OUT_OF_MEMORY Unable to allocate memory for merged buffer.
+
+ +
+

+ +

+
+ + + + + + + + + +
CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilPutPcSampData (CUptiUtil_PutPcSampDataParams *  pParams  ) 
+
+
+ +

+This API can be called multiple times. It will append buffer in the file. For every buffer it will store BufferInfo so that before retrieving data it will help to allocate buffer to store retrieved data. This API creates file if file does not present. If stallReasonIndex or stallReasons pointer of CUptiUtil_PutPcSampDataParams is NULL then stall reasons data will not be stored in file. It is expected to store all available stall reason data at least once to refer it during offline correlation.

+

Return values:
+ + + + + + +
CUPTI_UTIL_SUCCESS 
CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if buffer type is invalid or if either of pSamplingData, pParams pointer is NULL or stall reason configuration details not provided or filename is empty.
CUPTI_UTIL_ERROR_UNABLE_TO_CREATE_FILE 
CUPTI_UTIL_ERROR_UNABLE_TO_OPEN_FILE 
CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED 
+
+ +
+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/group__CUPTI__PROFILER__API.html b/doc/Cupti/group__CUPTI__PROFILER__API.html new file mode 100644 index 0000000000000000000000000000000000000000..60a1ea8500a276199e216d99666b0a9fcd2fa045 --- /dev/null +++ b/doc/Cupti/group__CUPTI__PROFILER__API.html @@ -0,0 +1,567 @@ + + +Cupti: CUPTI Profiling API + + + + + +
+

CUPTI Profiling API

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Structures

struct  CUpti_Profiler_BeginPass_Params
 Params for cuptiProfilerBeginPass. More...
struct  CUpti_Profiler_BeginSession_Params
 Params for cuptiProfilerBeginSession. More...
struct  CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params
 Params for cuptiProfilerCounterDataImageCalculateScratchBufferSize. More...
struct  CUpti_Profiler_CounterDataImage_CalculateSize_Params
 Params for cuptiProfilerCounterDataImageCalculateSize. More...
struct  CUpti_Profiler_CounterDataImage_Initialize_Params
 Params for cuptiProfilerCounterDataImageInitialize. More...
struct  CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params
 Params for cuptiProfilerCounterDataImageInitializeScratchBuffer. More...
struct  CUpti_Profiler_CounterDataImageOptions
 Input parameter to define the counterDataImage. More...
struct  CUpti_Profiler_DeInitialize_Params
 Default parameter for cuptiProfilerDeInitialize. More...
struct  CUpti_Profiler_DeviceSupported_Params
 Params for cuptiProfilerDeviceSupported. More...
struct  CUpti_Profiler_DisableProfiling_Params
 Params for cuptiProfilerDisableProfiling. More...
struct  CUpti_Profiler_EnableProfiling_Params
 Params for cuptiProfilerEnableProfiling. More...
struct  CUpti_Profiler_EndPass_Params
 Params for cuptiProfilerEndPass. More...
struct  CUpti_Profiler_EndSession_Params
 Params for cuptiProfilerEndSession. More...
struct  CUpti_Profiler_FlushCounterData_Params
 Params for cuptiProfilerFlushCounterData. More...
struct  CUpti_Profiler_GetCounterAvailability_Params
 Params for cuptiProfilerGetCounterAvailability. More...
struct  CUpti_Profiler_Initialize_Params
 Default parameter for cuptiProfilerInitialize. More...
struct  CUpti_Profiler_IsPassCollected_Params
 Params for cuptiProfilerIsPassCollected. More...
struct  CUpti_Profiler_SetConfig_Params
 Params for cuptiProfilerSetConfig. More...
struct  CUpti_Profiler_UnsetConfig_Params
 Params for cuptiProfilerUnsetConfig. More...

Enumerations

enum  CUpti_Profiler_Support_Level {
+  CUPTI_PROFILER_CONFIGURATION_UNKNOWN = 0, +
+  CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED, +
+  CUPTI_PROFILER_CONFIGURATION_DISABLED, +
+  CUPTI_PROFILER_CONFIGURATION_SUPPORTED +
+ }
 Generic support level enum for CUPTI. More...
enum  CUpti_ProfilerRange {
+  CUPTI_Range_INVALID, +
+  CUPTI_AutoRange, +
+  CUPTI_UserRange, +
+  CUPTI_Range_COUNT +
+ }
 Profiler range attribute. More...
enum  CUpti_ProfilerReplayMode {
+  CUPTI_Replay_INVALID, +
+  CUPTI_ApplicationReplay, +
+  CUPTI_KernelReplay, +
+  CUPTI_UserReplay, +
+  CUPTI_Replay_COUNT +
+ }
 Profiler replay attribute. More...

Functions

CUptiResult cuptiProfilerBeginPass (CUpti_Profiler_BeginPass_Params *pParams)
 Replay API: used for multipass collection.
CUptiResult cuptiProfilerBeginSession (CUpti_Profiler_BeginSession_Params *pParams)
 Begin profiling session sets up the profiling on the device.
CUptiResult cuptiProfilerCounterDataImageCalculateScratchBufferSize (CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params *pParams)
 A temporary storage for CounterData image needed for internal operations.
CUptiResult cuptiProfilerCounterDataImageCalculateSize (CUpti_Profiler_CounterDataImage_CalculateSize_Params *pParams)
 A CounterData image allocates space for values for each counter for each range.
+CUptiResult cuptiProfilerDeInitialize (CUpti_Profiler_DeInitialize_Params *pParams)
 DeInitializes the profiler interface.
CUptiResult cuptiProfilerDeviceSupported (CUpti_Profiler_DeviceSupported_Params *pParams)
 Query device compatibility with Profiling API.
CUptiResult cuptiProfilerDisableProfiling (CUpti_Profiler_DisableProfiling_Params *pParams)
 Disable Profiling.
CUptiResult cuptiProfilerEnableProfiling (CUpti_Profiler_EnableProfiling_Params *pParams)
 Enables Profiling.
CUptiResult cuptiProfilerEndPass (CUpti_Profiler_EndPass_Params *pParams)
 Replay API: used for multipass collection.
CUptiResult cuptiProfilerEndSession (CUpti_Profiler_EndSession_Params *pParams)
 Ends profiling session.
CUptiResult cuptiProfilerFlushCounterData (CUpti_Profiler_FlushCounterData_Params *pParams)
 Decode all the submitted passes.
CUptiResult cuptiProfilerGetCounterAvailability (CUpti_Profiler_GetCounterAvailability_Params *pParams)
 Query counter availibility.
CUptiResult cuptiProfilerInitialize (CUpti_Profiler_Initialize_Params *pParams)
 Initializes the profiler interface.
+CUptiResult cuptiProfilerIsPassCollected (CUpti_Profiler_IsPassCollected_Params *pParams)
 Asynchronous call to query if the submitted pass to GPU is collected.
CUptiResult cuptiProfilerPopRange (CUpti_Profiler_PopRange_Params *pParams)
 Range API's : Pop user range.
CUptiResult cuptiProfilerPushRange (CUpti_Profiler_PushRange_Params *pParams)
 Range API's : Push user range.
CUptiResult cuptiProfilerSetConfig (CUpti_Profiler_SetConfig_Params *pParams)
 Set metrics configuration to be profiled.
+CUptiResult cuptiProfilerUnsetConfig (CUpti_Profiler_UnsetConfig_Params *pParams)
 Unset metrics configuration profiled.
+

Detailed Description

+Functions, types, and enums that implement the CUPTI Profiling API.

Enumeration Type Documentation

+ +
+ +
+ +

+

Enumerator:
+ + + + + +
CUPTI_PROFILER_CONFIGURATION_UNKNOWN  +Configuration support level unknown - either detection code errored out before setting this value, or unable to determine it.
CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED  +Profiling is unavailable. For specific feature fields, this means that the current configuration of this feature does not work with profiling. For instance, SLI-enabled devices do not support profiling, and this value would be returned for SLI on an SLI-enabled device.
CUPTI_PROFILER_CONFIGURATION_DISABLED  +Profiling would be available for this configuration, but was disabled by the system.
CUPTI_PROFILER_CONFIGURATION_SUPPORTED  +Profiling is supported. For specific feature fields, this means that the current configuration of this feature works with profiling. For instance, SLI-enabled devices do not support profiling, and this value would only be returned for devices which are not SLI-enabled.
+
+ +
+

+ +

+
+ + + + +
enum CUpti_ProfilerRange
+
+
+ +

+A metric enabled in the session's configuration is collected separately per unique range-stack in the pass. This is an attribute to collect metrics around each kernel in a profiling session or in an user defined range.

Enumerator:
+ + + + + +
CUPTI_Range_INVALID  +Invalid value
CUPTI_AutoRange  +Ranges are auto defined around each kernel in a profiling session
CUPTI_UserRange  +A range in which metric data to be collected is defined by the user
CUPTI_Range_COUNT  +Range count
+
+ +
+

+ +

+
+ + + + +
enum CUpti_ProfilerReplayMode
+
+
+ +

+For metrics which require multipass collection, a replay of the GPU kernel(s) is required. This is an attribute which specify how the replay of the kernel(s) to be measured is done.

Enumerator:
+ + + + + + +
CUPTI_Replay_INVALID  +Invalid Value
CUPTI_ApplicationReplay  +Replay is done by CUPTI user around the process
CUPTI_KernelReplay  +Replay is done around kernel implicitly by CUPTI
CUPTI_UserReplay  +Replay is done by CUPTI user within a process
CUPTI_Replay_COUNT  +Replay count
+
+ +
+

+


Function Documentation

+ +
+
+ + + + + + + + + +
CUptiResult cuptiProfilerBeginPass (CUpti_Profiler_BeginPass_Params pParams  ) 
+
+
+ +

+These APIs are used if user chooses to replay by itself CUPTI_UserReplay or CUPTI_ApplicationReplay for multipass collection of the metrics configurations. It's a no-op in case of CUPTI_KernelReplay. +

+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiProfilerBeginSession (CUpti_Profiler_BeginSession_Params pParams  ) 
+
+
+ +

+Although, it doesn't start the profiling but GPU resources needed for profiling are allocated. Outside of a session, the GPU will return to its normal operating state. +

+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiProfilerCounterDataImageCalculateScratchBufferSize (CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params pParams  ) 
+
+
+ +

+Use these APIs to calculate the allocation size and initialize counterData image scratch buffer. +

+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiProfilerCounterDataImageCalculateSize (CUpti_Profiler_CounterDataImage_CalculateSize_Params pParams  ) 
+
+
+ +

+User borne the resposibility of managing the counterDataImage allocations. CounterDataPrefix contains meta data about the metrics that will be stored in counterDataImage. Use these APIs to calculate the allocation size and initialize counterData image. +

+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiProfilerDeviceSupported (CUpti_Profiler_DeviceSupported_Params pParams  ) 
+
+
+ +

+Use this call to determine whether a compute device and configuration are compatible with the Profiling API. If the configuration does not support profiling, one of several flags will indicate why. +

+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiProfilerDisableProfiling (CUpti_Profiler_DisableProfiling_Params pParams  ) 
+
+
+ +

+In CUPTI_AutoRange, these APIs are used to enable/disable profiling for the kernels to be executed in a profiling session. +

+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiProfilerEnableProfiling (CUpti_Profiler_EnableProfiling_Params pParams  ) 
+
+
+ +

+In CUPTI_AutoRange, these APIs are used to enable/disable profiling for the kernels to be executed in a profiling session. +

+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiProfilerEndPass (CUpti_Profiler_EndPass_Params pParams  ) 
+
+
+ +

+These APIs are used if user chooses to replay by itself CUPTI_UserReplay or CUPTI_ApplicationReplay for multipass collection of the metrics configurations. Its a no-op in case of CUPTI_KernelReplay. Returns information for next pass. +

+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiProfilerEndSession (CUpti_Profiler_EndSession_Params pParams  ) 
+
+
+ +

+Frees up the GPU resources acquired for profiling. Outside of a session, the GPU will return to it's normal operating state. +

+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiProfilerFlushCounterData (CUpti_Profiler_FlushCounterData_Params pParams  ) 
+
+
+ +

+Flush Counter data API to ensure every pass is decoded into the counterDataImage passed at beginSession. This will cause the CPU/GPU sync to collect all the undecoded pass. +

+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiProfilerGetCounterAvailability (CUpti_Profiler_GetCounterAvailability_Params pParams  ) 
+
+
+ +

+Use this API to query counter availability information in a buffer which can be used to filter unavailable raw metrics on host. Note: This API may fail, if any profiling or sampling session is active on the specified context or its device. +

+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiProfilerInitialize (CUpti_Profiler_Initialize_Params pParams  ) 
+
+
+ +

+Loads the required libraries in the process address space. Sets up the hooks with the CUDA driver. +

+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiProfilerPopRange (CUpti_Profiler_PopRange_Params *  pParams  ) 
+
+
+ +

+Counter data is collected per unique range-stack. Identified by a string label passsed by the user. It's an invalid operation in case of CUPTI_AutoRange. +

+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiProfilerPushRange (CUpti_Profiler_PushRange_Params *  pParams  ) 
+
+
+ +

+Counter data is collected per unique range-stack. Identified by a string label passsed by the user. It's an invalid operation in case of CUPTI_AutoRange. +

+

+ +

+
+ + + + + + + + + +
CUptiResult cuptiProfilerSetConfig (CUpti_Profiler_SetConfig_Params pParams  ) 
+
+
+ +

+Use these APIs to set the config to profile in a session. It can be used for advanced cases such as where multiple configurations are collected into a single CounterData Image on the need basis, without restarting the session. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/group__CUPTI__RESULT__API.html b/doc/Cupti/group__CUPTI__RESULT__API.html new file mode 100644 index 0000000000000000000000000000000000000000..5bb56e261ff2b3d9ba367a75cfc9081c88d49a5e --- /dev/null +++ b/doc/Cupti/group__CUPTI__RESULT__API.html @@ -0,0 +1,276 @@ + + +Cupti: CUPTI Result Codes + + + + + +
+

CUPTI Result Codes

+ + + + + + + + + +

Enumerations

enum  CUptiResult {
+  CUPTI_SUCCESS = 0, +
+  CUPTI_ERROR_INVALID_PARAMETER = 1, +
+  CUPTI_ERROR_INVALID_DEVICE = 2, +
+  CUPTI_ERROR_INVALID_CONTEXT = 3, +
+  CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID = 4, +
+  CUPTI_ERROR_INVALID_EVENT_ID = 5, +
+  CUPTI_ERROR_INVALID_EVENT_NAME = 6, +
+  CUPTI_ERROR_INVALID_OPERATION = 7, +
+  CUPTI_ERROR_OUT_OF_MEMORY = 8, +
+  CUPTI_ERROR_HARDWARE = 9, +
+  CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT = 10, +
+  CUPTI_ERROR_API_NOT_IMPLEMENTED = 11, +
+  CUPTI_ERROR_MAX_LIMIT_REACHED = 12, +
+  CUPTI_ERROR_NOT_READY = 13, +
+  CUPTI_ERROR_NOT_COMPATIBLE = 14, +
+  CUPTI_ERROR_NOT_INITIALIZED = 15, +
+  CUPTI_ERROR_INVALID_METRIC_ID = 16, +
+  CUPTI_ERROR_INVALID_METRIC_NAME = 17, +
+  CUPTI_ERROR_QUEUE_EMPTY = 18, +
+  CUPTI_ERROR_INVALID_HANDLE = 19, +
+  CUPTI_ERROR_INVALID_STREAM = 20, +
+  CUPTI_ERROR_INVALID_KIND = 21, +
+  CUPTI_ERROR_INVALID_EVENT_VALUE = 22, +
+  CUPTI_ERROR_DISABLED = 23, +
+  CUPTI_ERROR_INVALID_MODULE = 24, +
+  CUPTI_ERROR_INVALID_METRIC_VALUE = 25, +
+  CUPTI_ERROR_HARDWARE_BUSY = 26, +
+  CUPTI_ERROR_NOT_SUPPORTED = 27, +
+  CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED = 28, +
+  CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE = 29, +
+  CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES = 30, +
+  CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_WITH_MPS = 31, +
+  CUPTI_ERROR_CDP_TRACING_NOT_SUPPORTED = 32, +
+  CUPTI_ERROR_VIRTUALIZED_DEVICE_NOT_SUPPORTED = 33, +
+  CUPTI_ERROR_CUDA_COMPILER_NOT_COMPATIBLE = 34, +
+  CUPTI_ERROR_INSUFFICIENT_PRIVILEGES = 35, +
+  CUPTI_ERROR_OLD_PROFILER_API_INITIALIZED = 36, +
+  CUPTI_ERROR_OPENACC_UNDEFINED_ROUTINE = 37, +
+  CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED = 38, +
+  CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED = 39, +
+  CUPTI_ERROR_VIRTUALIZED_DEVICE_INSUFFICIENT_PRIVILEGES = 40, +
+  CUPTI_ERROR_CONFIDENTIAL_COMPUTING_NOT_SUPPORTED = 41, +
+  CUPTI_ERROR_CMP_DEVICE_NOT_SUPPORTED = 42, +
+  CUPTI_ERROR_UNKNOWN = 999 +
+ }
 CUPTI result codes. More...

Functions

CUptiResult cuptiGetResultString (CUptiResult result, const char **str)
 Get the descriptive string for a CUptiResult.
+

Detailed Description

+Error and result codes returned by CUPTI functions.

Enumeration Type Documentation

+ +
+
+ + + + +
enum CUptiResult
+
+
+ +

+Error and result codes returned by CUPTI functions.

Enumerator:
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUPTI_SUCCESS  +No error.
CUPTI_ERROR_INVALID_PARAMETER  +One or more of the parameters is invalid.
CUPTI_ERROR_INVALID_DEVICE  +The device does not correspond to a valid CUDA device.
CUPTI_ERROR_INVALID_CONTEXT  +The context is NULL or not valid.
CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID  +The event domain id is invalid.
CUPTI_ERROR_INVALID_EVENT_ID  +The event id is invalid.
CUPTI_ERROR_INVALID_EVENT_NAME  +The event name is invalid.
CUPTI_ERROR_INVALID_OPERATION  +The current operation cannot be performed due to dependency on other factors.
CUPTI_ERROR_OUT_OF_MEMORY  +Unable to allocate enough memory to perform the requested operation.
CUPTI_ERROR_HARDWARE  +An error occurred on the performance monitoring hardware.
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT  +The output buffer size is not sufficient to return all requested data.
CUPTI_ERROR_API_NOT_IMPLEMENTED  +API is not implemented.
CUPTI_ERROR_MAX_LIMIT_REACHED  +The maximum limit is reached.
CUPTI_ERROR_NOT_READY  +The object is not yet ready to perform the requested operation.
CUPTI_ERROR_NOT_COMPATIBLE  +The current operation is not compatible with the current state of the object
CUPTI_ERROR_NOT_INITIALIZED  +CUPTI is unable to initialize its connection to the CUDA driver.
CUPTI_ERROR_INVALID_METRIC_ID  +The metric id is invalid.
CUPTI_ERROR_INVALID_METRIC_NAME  +The metric name is invalid.
CUPTI_ERROR_QUEUE_EMPTY  +The queue is empty.
CUPTI_ERROR_INVALID_HANDLE  +Invalid handle (internal?).
CUPTI_ERROR_INVALID_STREAM  +Invalid stream.
CUPTI_ERROR_INVALID_KIND  +Invalid kind.
CUPTI_ERROR_INVALID_EVENT_VALUE  +Invalid event value.
CUPTI_ERROR_DISABLED  +CUPTI is disabled due to conflicts with other enabled profilers
CUPTI_ERROR_INVALID_MODULE  +Invalid module.
CUPTI_ERROR_INVALID_METRIC_VALUE  +Invalid metric value.
CUPTI_ERROR_HARDWARE_BUSY  +The performance monitoring hardware is in use by other client.
CUPTI_ERROR_NOT_SUPPORTED  +The attempted operation is not supported on the current system or device.
CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED  +Unified memory profiling is not supported on the system. Potential reason could be unsupported OS or architecture.
CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE  +Unified memory profiling is not supported on the device
CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES  +Unified memory profiling is not supported on a multi-GPU configuration without P2P support between any pair of devices
CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_WITH_MPS  +Unified memory profiling is not supported under the Multi-Process Service (MPS) environment. CUDA 7.5 removes this restriction.
CUPTI_ERROR_CDP_TRACING_NOT_SUPPORTED  +In CUDA 9.0, devices with compute capability 7.0 don't support CDP tracing
CUPTI_ERROR_VIRTUALIZED_DEVICE_NOT_SUPPORTED  +Profiling on virtualized GPU is not supported.
CUPTI_ERROR_CUDA_COMPILER_NOT_COMPATIBLE  +Profiling results might be incorrect for CUDA applications compiled with nvcc version older than 9.0 for devices with compute capability 6.0 and 6.1. Profiling session will continue and CUPTI will notify it using this error code. User is advised to recompile the application code with nvcc version 9.0 or later. Ignore this warning if code is already compiled with the recommended nvcc version.
CUPTI_ERROR_INSUFFICIENT_PRIVILEGES  +User doesn't have sufficient privileges which are required to start the profiling session. One possible reason for this may be that the NVIDIA driver or your system administrator may have restricted access to the NVIDIA GPU performance counters. To learn how to resolve this issue and find more information, please visit https://developer.nvidia.com/CUPTI_ERROR_INSUFFICIENT_PRIVILEGES
CUPTI_ERROR_OLD_PROFILER_API_INITIALIZED  +Legacy CUPTI Profiling API i.e. event API from the header cupti_events.h and metric API from the header cupti_metrics.h are not compatible with the Profiling API in the header cupti_profiler_target.h and Perfworks metrics API in the headers nvperf_host.h and nvperf_target.h.
CUPTI_ERROR_OPENACC_UNDEFINED_ROUTINE  +Missing definition of the OpenACC API routine in the linked OpenACC library.

+One possible reason is that OpenACC library is linked statically in the user application, which might not have the definition of all the OpenACC API routines needed for the OpenACC profiling, as compiler might ignore definitions for the functions not used in the application. This issue can be mitigated by linking the OpenACC library dynamically.

CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED  +Legacy CUPTI Profiling API i.e. event API from the header cupti_events.h and metric API from the header cupti_metrics.h are not supported on devices with compute capability 7.5 and higher (i.e. Turing and later GPU architectures). These API will be deprecated in a future CUDA release. These are replaced by Profiling API in the header cupti_profiler_target.h and Perfworks metrics API in the headers nvperf_host.h and nvperf_target.h.
CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED  +CUPTI doesn't allow multiple callback subscribers. Only a single subscriber can be registered at a time. Same error code is used when application is launched using NVIDIA tools like nvprof, Visual Profiler, Nsight Systems, Nsight Compute, cuda-gdb and cuda-memcheck.
CUPTI_ERROR_VIRTUALIZED_DEVICE_INSUFFICIENT_PRIVILEGES  +Profiling on virtualized GPU is not allowed by hypervisor.
CUPTI_ERROR_CONFIDENTIAL_COMPUTING_NOT_SUPPORTED  +Profiling and tracing are not allowed when confidential computing mode is enabled.
CUPTI_ERROR_CMP_DEVICE_NOT_SUPPORTED  +CUPTI does not support NVIDIA Crypto Mining Processors (CMP). For more information, please visit https://developer.nvidia.com/ERR_NVCMPGPU
CUPTI_ERROR_UNKNOWN  +An unknown internal error has occurred.
+
+ +
+

+


Function Documentation

+ +
+
+ + + + + + + + + + + + + + + + + + +
CUptiResult cuptiGetResultString (CUptiResult  result,
const char **  str 
)
+
+
+ +

+Return the descriptive string for a CUptiResult in *str.

Note:
Thread-safety: this function is thread safe.
+
Parameters:
+ + + +
result The result to get the string for
str Returns the string
+
+
Return values:
+ + + +
CUPTI_SUCCESS on success
CUPTI_ERROR_INVALID_PARAMETER if str is NULL or result is not a valid CUptiResult
+
+ +
+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/group__CUPTI__VERSION__API.html b/doc/Cupti/group__CUPTI__VERSION__API.html new file mode 100644 index 0000000000000000000000000000000000000000..9ef2a94f7be3f4a4a7a895fd35e09d959a56fd7b --- /dev/null +++ b/doc/Cupti/group__CUPTI__VERSION__API.html @@ -0,0 +1,86 @@ + + +Cupti: CUPTI Version + + + + + +
+

CUPTI Version

+ + + + + + + + + +

Defines

#define CUPTI_API_VERSION   18
 The API version for this implementation of CUPTI.

Functions

CUptiResult cuptiGetVersion (uint32_t *version)
 Get the CUPTI API version.
+

Detailed Description

+Function and macro to determine the CUPTI version.

Define Documentation

+ +
+
+ + + + +
#define CUPTI_API_VERSION   18
+
+
+ +

+The API version for this implementation of CUPTI. This define along with cuptiGetVersion can be used to dynamically detect if the version of CUPTI compiled against matches the version of the loaded CUPTI library.

+v1 : CUDAToolsSDK 4.0 v2 : CUDAToolsSDK 4.1 v3 : CUDA Toolkit 5.0 v4 : CUDA Toolkit 5.5 v5 : CUDA Toolkit 6.0 v6 : CUDA Toolkit 6.5 v7 : CUDA Toolkit 6.5(with sm_52 support) v8 : CUDA Toolkit 7.0 v9 : CUDA Toolkit 8.0 v10 : CUDA Toolkit 9.0 v11 : CUDA Toolkit 9.1 v12 : CUDA Toolkit 10.0, 10.1 and 10.2 v13 : CUDA Toolkit 11.0 v14 : CUDA Toolkit 11.1 v15 : CUDA Toolkit 11.2, 11.3 and 11.4 v16 : CUDA Toolkit 11.5 v17 : CUDA Toolkit 11.6 v18 : CUDA Toolkit 11.8 +

+

+


Function Documentation

+ +
+
+ + + + + + + + + +
CUptiResult cuptiGetVersion (uint32_t *  version  ) 
+
+
+ +

+Return the API version in *version.

+

Parameters:
+ + +
version Returns the version
+
+
Return values:
+ + + +
CUPTI_SUCCESS on success
CUPTI_ERROR_INVALID_PARAMETER if version is NULL
+
+
See also:
CUPTI_API_VERSION
+ +
+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/index.html b/doc/Cupti/index.html new file mode 100644 index 0000000000000000000000000000000000000000..5fe449c7dc3ac08dddd401f9e5fe2019dda8c9bb --- /dev/null +++ b/doc/Cupti/index.html @@ -0,0 +1,1177 @@ + + + + + + + + + + + + + + + + + + + CUPTI :: CUPTI Documentation + + --> + + + + +
+ +
+ +
+ +
+
+

CUPTI

+
+

The API reference guide for CUPTI, the CUDA Profiling Tools + Interface. + +

+

The CUPTI API.

+
+
+
+

Table of Contents

+
+ +
+
+
+
+
+
+ \ No newline at end of file diff --git a/doc/Cupti/modules.html b/doc/Cupti/modules.html new file mode 100644 index 0000000000000000000000000000000000000000..831c51dad3a0627a8ab5bdd0de2dabb0da0b562a --- /dev/null +++ b/doc/Cupti/modules.html @@ -0,0 +1,10218 @@ + + + + + + + + + + + + + + CUPTI :: CUPTI Documentation + + + --> + + + + + + + + + + + + + + + + + + +
+ +
+ + +
+ +
+
+

5. Modules

+ +
+

5.1. CUPTI Result Codes

+
+

Error and result codes returned by CUPTI functions.

+
+

Enumerations

+
+
enum CUptiResult
+
CUPTI result codes.
+
+

Functions

+
+
CUptiResult cuptiGetResultString ( CUptiResult result, const char** str )
+
Get the descriptive string for a CUptiResult.
+
+
+

Enumerations

+
+
+ enum CUptiResult
+
+
+

Error and result codes returned by CUPTI functions.

+
+
+
+ Values + +
+
+
CUPTI_SUCCESS = 0
+
No error.
+
CUPTI_ERROR_INVALID_PARAMETER = 1
+
One or more of the parameters is invalid.
+
CUPTI_ERROR_INVALID_DEVICE = 2
+
The device does not correspond to a valid CUDA device.
+
CUPTI_ERROR_INVALID_CONTEXT = 3
+
The context is NULL or not valid.
+
CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID = 4
+
The event domain id is invalid.
+
CUPTI_ERROR_INVALID_EVENT_ID = 5
+
The event id is invalid.
+
CUPTI_ERROR_INVALID_EVENT_NAME = 6
+
The event name is invalid.
+
CUPTI_ERROR_INVALID_OPERATION = 7
+
The current operation cannot be performed due to dependency on other factors.
+
CUPTI_ERROR_OUT_OF_MEMORY = 8
+
Unable to allocate enough memory to perform the requested operation.
+
CUPTI_ERROR_HARDWARE = 9
+
An error occurred on the performance monitoring hardware.
+
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT = 10
+
The output buffer size is not sufficient to return all requested data.
+
CUPTI_ERROR_API_NOT_IMPLEMENTED = 11
+
API is not implemented.
+
CUPTI_ERROR_MAX_LIMIT_REACHED = 12
+
The maximum limit is reached.
+
CUPTI_ERROR_NOT_READY = 13
+
The object is not yet ready to perform the requested operation.
+
CUPTI_ERROR_NOT_COMPATIBLE = 14
+
The current operation is not compatible with the current state of the object
+
CUPTI_ERROR_NOT_INITIALIZED = 15
+
CUPTI is unable to initialize its connection to the CUDA driver.
+
CUPTI_ERROR_INVALID_METRIC_ID = 16
+
The metric id is invalid.
+
CUPTI_ERROR_INVALID_METRIC_NAME = 17
+
The metric name is invalid.
+
CUPTI_ERROR_QUEUE_EMPTY = 18
+
The queue is empty.
+
CUPTI_ERROR_INVALID_HANDLE = 19
+
Invalid handle (internal?).
+
CUPTI_ERROR_INVALID_STREAM = 20
+
Invalid stream.
+
CUPTI_ERROR_INVALID_KIND = 21
+
Invalid kind.
+
CUPTI_ERROR_INVALID_EVENT_VALUE = 22
+
Invalid event value.
+
CUPTI_ERROR_DISABLED = 23
+
CUPTI is disabled due to conflicts with other enabled profilers
+
CUPTI_ERROR_INVALID_MODULE = 24
+
Invalid module.
+
CUPTI_ERROR_INVALID_METRIC_VALUE = 25
+
Invalid metric value.
+
CUPTI_ERROR_HARDWARE_BUSY = 26
+
The performance monitoring hardware is in use by other client.
+
CUPTI_ERROR_NOT_SUPPORTED = 27
+
The attempted operation is not supported on the current system or device.
+
CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED = 28
+
Unified memory profiling is not supported on the system. Potential reason could be unsupported OS or architecture.
+
CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE = 29
+
Unified memory profiling is not supported on the device
+
CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES = 30
+
Unified memory profiling is not supported on a multi-GPU configuration without P2P support between any pair of devices
+
CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_WITH_MPS = 31
+
Unified memory profiling is not supported under the Multi-Process Service (MPS) environment. CUDA 7.5 removes this restriction. + +
+
CUPTI_ERROR_CDP_TRACING_NOT_SUPPORTED = 32
+
In CUDA 9.0, devices with compute capability 7.0 don't support CDP tracing
+
CUPTI_ERROR_VIRTUALIZED_DEVICE_NOT_SUPPORTED = 33
+
Profiling on virtualized GPU is not supported.
+
CUPTI_ERROR_CUDA_COMPILER_NOT_COMPATIBLE = 34
+
Profiling results might be incorrect for CUDA applications compiled with nvcc version older than 9.0 for devices with compute + capability 6.0 and 6.1. Profiling session will continue and CUPTI will notify it using this error code. User is advised to + recompile the application code with nvcc version 9.0 or later. Ignore this warning if code is already compiled with the recommended + nvcc version. +
+
CUPTI_ERROR_INSUFFICIENT_PRIVILEGES = 35
+
User doesn't have sufficient privileges which are required to start the profiling session. One possible reason for this may + be that the NVIDIA driver or your system administrator may have restricted access to the NVIDIA GPU performance counters. + To learn how to resolve this issue and find more information, please visit https://developer.nvidia.com/CUPTI_ERROR_INSUFFICIENT_PRIVILEGES
+
CUPTI_ERROR_OLD_PROFILER_API_INITIALIZED = 36
+
Legacy CUPTI Profiling API i.e. event API from the header cupti_events.h and metric API from the header cupti_metrics.h are + not compatible with the Profiling API in the header cupti_profiler_target.h and Perfworks metrics API in the headers nvperf_host.h + and nvperf_target.h. +
+
CUPTI_ERROR_OPENACC_UNDEFINED_ROUTINE = 37
+
Missing definition of the OpenACC API routine in the linked OpenACC library.One possible reason is that OpenACC library is + linked statically in the user application, which might not have the definition of all the OpenACC API routines needed for + the OpenACC profiling, as compiler might ignore definitions for the functions not used in the application. This issue can + be mitigated by linking the OpenACC library dynamically. +
+
CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED = 38
+
Legacy CUPTI Profiling API i.e. event API from the header cupti_events.h and metric API from the header cupti_metrics.h are + not supported on devices with compute capability 7.5 and higher (i.e. Turing and later GPU architectures). These API will + be deprecated in a future CUDA release. These are replaced by Profiling API in the header cupti_profiler_target.h and Perfworks + metrics API in the headers nvperf_host.h and nvperf_target.h. +
+
CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED = 39
+
CUPTI doesn't allow multiple callback subscribers. Only a single subscriber can be registered at a time. Same error code is + used when application is launched using NVIDIA tools like nvprof, Visual Profiler, Nsight Systems, Nsight Compute, cuda-gdb + and cuda-memcheck. +
+
CUPTI_ERROR_VIRTUALIZED_DEVICE_INSUFFICIENT_PRIVILEGES = 40
+
Profiling on virtualized GPU is not allowed by hypervisor.
+
CUPTI_ERROR_CONFIDENTIAL_COMPUTING_NOT_SUPPORTED = 41
+
Profiling and tracing are not allowed when confidential computing mode is enabled.
+
CUPTI_ERROR_CMP_DEVICE_NOT_SUPPORTED = 42
+
CUPTI does not support NVIDIA Crypto Mining Processors (CMP). For more information, please visit https://developer.nvidia.com/ERR_NVCMPGPU
+
CUPTI_ERROR_UNKNOWN = 999
+
An unknown internal error has occurred.
+
CUPTI_ERROR_FORCE_INT = 0x7fffffff
+
+
+
+
+
+
+
+

Functions

+
+
+ CUptiResult cuptiGetResultString ( CUptiResult result, const char** str )
+
+
Get the descriptive string for a CUptiResult.
+
+
+ Parameters + +
+
+
result
+
The result to get the string for
+
str
+
Returns the string
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    on success

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if str is NULL or result is not a valid CUptiResult +

    +
  • +
+

+
+
+
Description
+

Return the descriptive string for a CUptiResult in *str. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+
+
+
+

5.2. CUPTI Version

+
+

Function and macro to determine the CUPTI version.

+
+

Defines

+
+
#define CUPTI_API_VERSION 18
+
The API version for this implementation of CUPTI.
+
+

Functions

+
+
CUptiResult cuptiGetVersion ( uint32_t* version )
+
Get the CUPTI API version.
+
+
+

Defines

+
+
+ #define CUPTI_API_VERSION 18
+
+
+

The API version for this implementation of CUPTI. This define along with cuptiGetVersion can be used to dynamically detect if the version of CUPTI compiled against matches the version of the loaded CUPTI library. +

+

v1 : CUDAToolsSDK 4.0 v2 : CUDAToolsSDK 4.1 v3 : CUDA Toolkit 5.0 v4 : CUDA Toolkit 5.5 v5 : CUDA Toolkit 6.0 v6 : CUDA Toolkit + 6.5 v7 : CUDA Toolkit 6.5(with sm_52 support) v8 : CUDA Toolkit 7.0 v9 : CUDA Toolkit 8.0 v10 : CUDA Toolkit 9.0 v11 : CUDA + Toolkit 9.1 v12 : CUDA Toolkit 10.0, 10.1 and 10.2 v13 : CUDA Toolkit 11.0 v14 : CUDA Toolkit 11.1 v15 : CUDA Toolkit 11.2, + 11.3 and 11.4 v16 : CUDA Toolkit 11.5 v17 : CUDA Toolkit 11.6 v18 : CUDA Toolkit 11.8 +

+
+
+
+
+
+

Functions

+
+
+ CUptiResult cuptiGetVersion ( uint32_t* version )
+
+
Get the CUPTI API version.
+
+
+ Parameters + +
+
+
version
+
Returns the version
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    on success

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if version is NULL +

    +
  • +
+

+
+
+
Description
+

Return the API version in *version. +

+

+

See also:

+

CUPTI_API_VERSION

+

+
+
+
+
+
+
+

5.3. CUPTI Activity API

+
+

Functions, types, and enums that implement the CUPTI Activity API.

+
+

Classes

+
+
struct  +
+
The base activity record.
+
struct  +
+
The activity record for a driver or runtime API invocation.
+
struct  +
+
Device auto boost state structure.
+
struct  +
+
The activity record for source level result branch. (deprecated).
+
struct  +
+
The activity record for source level result branch.
+
struct  +
+
The activity record for CDP (CUDA Dynamic Parallelism) kernel.
+
struct  +
+
The activity record for a context.
+
struct  +
+
The activity record for CUDA event.
+
struct  +
+
The activity record for a device. (deprecated).
+
struct  +
+
The activity record for a device. (deprecated).
+
struct  +
+
The activity record for a device. (CUDA 7.0 onwards).
+
struct  +
+
The activity record for a device. (CUDA 11.6 onwards).
+
struct  +
+
The activity record for a device attribute.
+
struct  +
+
The activity record for CUPTI environmental data.
+
struct  +
+
The activity record for a CUPTI event.
+
struct  +
+
The activity record for a CUPTI event with instance information.
+
struct  +
+
The activity record for correlation with external records.
+
struct  +
+
The activity record for global/device functions.
+
struct  +
+
The activity record for source-level global access. (deprecated).
+
struct  +
+
The activity record for source-level global access. (deprecated in CUDA 9.0).
+
struct  +
+
The activity record for source-level global access.
+
struct  +
+
The activity record for trace of graph execution.
+
struct  +
+
The activity record for an instantaneous CUPTI event.
+
struct  +
+
The activity record for an instantaneous CUPTI event with event domain instance information.
+
struct  +
+
The activity record for an instantaneous CUPTI metric.
+
struct  +
+
The instantaneous activity record for a CUPTI metric with instance information.
+
struct  +
+
The activity record for source-level sass/source line-by-line correlation.
+
struct  +
+
The activity record for source-level instruction execution.
+
struct  +
+
The activity record for JIT operations. This activity represents the JIT operations (compile, load, store) of a CUmodule from + the Compute Cache. Gives the exact hashed path of where the cached module is loaded from, or where the module will be stored + after Just-In-Time (JIT) compilation.
+
struct  +
+
The activity record for kernel. (deprecated).
+
struct  +
+
The activity record for kernel. (deprecated).
+
struct  +
+
The activity record for a kernel (CUDA 6.5(with sm_52 support) onwards). (deprecated in CUDA 9.0).
+
struct  +
+
The activity record for a kernel (CUDA 9.0(with sm_70 support) onwards). (deprecated in CUDA 11.0).
+
struct  +
+
The activity record for a kernel (CUDA 11.0(with sm_80 support) onwards). (deprecated in CUDA 11.2) This activity record represents + a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL) but is no longer generated by CUPTI. + Kernel activities are now reported using the CUpti_ActivityKernel8 activity record.
+
struct  +
+
The activity record for kernel. (deprecated in CUDA 11.6).
+
struct  +
+
The activity record for kernel. (deprecated in CUDA 11.8).
+
struct  +
+
The activity record for kernel.
+
struct  +
+
The activity record providing a marker which is an instantaneous point in time. (deprecated in CUDA 8.0).
+
struct  +
+
The activity record providing a marker which is an instantaneous point in time.
+
struct  +
+
The activity record providing detailed information for a marker.
+
struct  +
+
The activity record for memory copies. (deprecated).
+
struct  +
+
The activity record for memory copies. (deprecated in CUDA 11.1).
+
struct  +
+
The activity record for memory copies. (deprecated in CUDA 11.6).
+
struct  +
+
The activity record for memory copies.
+
struct  +
+
The activity record for peer-to-peer memory copies.
+
struct  +
+
The activity record for peer-to-peer memory copies. (deprecated in CUDA 11.1).
+
struct  +
+
The activity record for peer-to-peer memory copies. (deprecated in CUDA 11.6).
+
struct  +
+
The activity record for peer-to-peer memory copies.
+
struct  +
+
The activity record for memory.
+
struct  +
+
The activity record for memory.
+
struct  +
+
The activity record for memory.
+
struct  +
+
The activity record for memory pool.
+
struct  +
+
The activity record for memory pool.
+
struct  +
+
The activity record for memset. (deprecated).
+
struct  +
+
The activity record for memset. (deprecated in CUDA 11.1).
+
struct  +
+
The activity record for memset. (deprecated in CUDA 11.6).
+
struct  +
+
The activity record for memset.
+
struct  +
+
The activity record for a CUPTI metric.
+
struct  +
+
The activity record for a CUPTI metric with instance information.
+
struct  +
+
The activity record for a CUDA module.
+
struct  +
+
The activity record providing a name.
+
struct  +
+
NVLink information. (deprecated in CUDA 9.0).
+
struct  +
+
NVLink information. (deprecated in CUDA 10.0).
+
struct  +
+
NVLink information.
+
struct  +
+
NVLink information.
+
union  +
+
Identifiers for object kinds as specified by CUpti_ActivityObjectKind.
+
struct  +
+
The base activity record for OpenAcc records.
+
struct  +
+
The activity record for OpenACC data.
+
struct  +
+
The activity record for OpenACC launch.
+
struct  +
+
The activity record for OpenACC other.
+
struct  +
+
The base activity record for OpenMp records.
+
struct  +
+
The activity record for CUPTI and driver overheads.
+
struct  +
+
The activity record for PC sampling. (deprecated in CUDA 8.0).
+
struct  +
+
The activity record for PC sampling. (deprecated in CUDA 9.0).
+
struct  +
+
The activity record for PC sampling.
+
struct  +
+
PC sampling configuration structure.
+
struct  +
+
The activity record for record status for PC sampling.
+
struct  +
+
PCI devices information required to construct topology.
+
struct  +
+
The activity record for a preemption of a CDP kernel.
+
struct  +
+
The activity record for source-level shared access.
+
struct  +
+
The activity record for source locator.
+
struct  +
+
The activity record for CUDA stream.
+
struct  +
+
The activity record for synchronization management.
+
struct  +
+
The activity record for Unified Memory counters (deprecated in CUDA 7.0).
+
struct  +
+
The activity record for Unified Memory counters (CUDA 7.0 and beyond).
+
struct  +
+
Unified Memory counters configuration structure.
+
+

Defines

+
+
#define CUPTI_AUTO_BOOST_INVALID_CLIENT_PID 0
+
+
#define CUPTI_CORRELATION_ID_UNKNOWN 0
+
+
#define CUPTI_FUNCTION_INDEX_ID_INVALID 0
+
+
#define CUPTI_GRID_ID_UNKNOWN 0LL
+
+
#define CUPTI_MAX_NVLINK_PORTS 32
+
+
#define CUPTI_NVLINK_INVALID_PORT -1
+
+
#define CUPTI_SOURCE_LOCATOR_ID_UNKNOWN 0
+
+
#define CUPTI_SYNCHRONIZATION_INVALID_VALUE -1
+
+
#define CUPTI_TIMESTAMP_UNKNOWN 0LL
+
+
+

Typedefs

+
+
typedef + void +  ( *CUpti_BuffersCallbackCompleteFunc )( CUcontext context,  uint32_t streamId, uint8_t* +  buffer,  size_t size,  size_t validSize )
+
Function type for callback used by CUPTI to return a buffer of activity records.
+
typedef + void +  ( *CUpti_BuffersCallbackRequestFunc )( uint8_t* +  *buffer, size_t* +  size, size_t* +  maxNumRecords )
+
Function type for callback used by CUPTI to request an empty buffer for storing activity records.
+
typedef + uint64_t( +  ( *CUpti_TimestampCallbackFunc )( )
+
Function type for callback used by CUPTI to request a timestamp to be used in activity records.
+
+

Enumerations

+
+
enum CUpti_ActivityAttribute
+
Activity attributes.
+
enum CUpti_ActivityComputeApiKind
+
The kind of a compute API.
+
enum CUpti_ActivityEnvironmentKind
+
The kind of environment data. Used to indicate what type of data is being reported by an environment activity record.
+
enum CUpti_ActivityFlag
+
Flags associated with activity records.
+
enum CUpti_ActivityInstructionClass
+
SASS instruction classification.
+
enum CUpti_ActivityJitEntryType
+
The types of JIT entry.
+
enum CUpti_ActivityJitOperationType
+
The types of JIT compilation operations.
+
enum CUpti_ActivityKind
+
The kinds of activity records.
+
enum CUpti_ActivityLaunchType
+
The type of the CUDA kernel launch.
+
enum CUpti_ActivityMemcpyKind
+
The kind of a memory copy, indicating the source and destination targets of the copy.
+
enum CUpti_ActivityMemoryKind
+
The kinds of memory accessed by a memory operation/copy.
+
enum CUpti_ActivityMemoryOperationType
+
Memory operation types.
+
enum CUpti_ActivityMemoryPoolOperationType
+
Memory pool operation types.
+
enum CUpti_ActivityMemoryPoolType
+
Memory pool types.
+
enum CUpti_ActivityObjectKind
+
The kinds of activity objects.
+
enum CUpti_ActivityOverheadKind
+
The kinds of activity overhead.
+
enum CUpti_ActivityPCSamplingPeriod
+
Sampling period for PC sampling method.
+
enum CUpti_ActivityPCSamplingStallReason
+
The stall reason for PC sampling activity.
+
enum CUpti_ActivityPartitionedGlobalCacheConfig
+
Partitioned global caching option.
+
enum CUpti_ActivityPreemptionKind
+
The kind of a preemption activity.
+
enum CUpti_ActivityStreamFlag
+
stream type.
+
enum CUpti_ActivitySynchronizationType
+
Synchronization type.
+
enum CUpti_ActivityThreadIdType
+
Thread-Id types.
+
enum CUpti_ActivityUnifiedMemoryAccessType
+
Memory access type for unified memory page faults.
+
enum CUpti_ActivityUnifiedMemoryCounterKind
+
Kind of the Unified Memory counter.
+
enum CUpti_ActivityUnifiedMemoryCounterScope
+
Scope of the unified memory counter (deprecated in CUDA 7.0).
+
enum CUpti_ActivityUnifiedMemoryMigrationCause
+
Migration cause of the Unified Memory counter.
+
enum CUpti_ActivityUnifiedMemoryRemoteMapCause
+
Remote memory map cause of the Unified Memory counter.
+
enum CUpti_DevType
+
The device type for device connected to NVLink.
+
enum CUpti_DeviceVirtualizationMode
+
+
enum CUpti_EnvironmentClocksThrottleReason
+
Reasons for clock throttling.
+
enum CUpti_ExternalCorrelationKind
+
The kind of external APIs supported for correlation.
+
enum CUpti_FuncShmemLimitConfig
+
The shared memory limit per block config for a kernel This should be used to set 'cudaOccFuncShmemConfig' field in occupancy + calculator API.
+
enum CUpti_LinkFlag
+
Link flags.
+
enum CUpti_OpenAccConstructKind
+
The OpenAcc parent construct kind for OpenAcc activity records.
+
enum CUpti_OpenAccEventKind
+
The OpenAcc event kind for OpenAcc activity records.
+
enum CUpti_PcieDeviceType
+
+
enum CUpti_PcieGen
+
PCIE Generation.
+
+

Functions

+
+
CUptiResult cuptiActivityConfigurePCSampling ( CUcontext ctx, CUpti_ActivityPCSamplingConfig* config )
+
Set PC sampling configuration.
+
CUptiResult cuptiActivityConfigureUnifiedMemoryCounter ( CUpti_ActivityUnifiedMemoryCounterConfig* config, uint32_t count )
+
Set Unified Memory Counter configuration.
+
CUptiResult cuptiActivityDisable ( CUpti_ActivityKind kind )
+
Disable collection of a specific kind of activity record.
+
CUptiResult cuptiActivityDisableContext ( CUcontext context, CUpti_ActivityKind kind )
+
Disable collection of a specific kind of activity record for a context.
+
CUptiResult cuptiActivityEnable ( CUpti_ActivityKind kind )
+
Enable collection of a specific kind of activity record.
+
CUptiResult cuptiActivityEnableAndDump ( CUpti_ActivityKind kind )
+
Enable collection of a specific kind of activity record. For certain activity kinds it dumps existing records.
+
CUptiResult cuptiActivityEnableContext ( CUcontext context, CUpti_ActivityKind kind )
+
Enable collection of a specific kind of activity record for a context.
+
CUptiResult cuptiActivityEnableLatencyTimestamps ( uint8_t enable )
+
Controls the collection of queued and submitted timestamps for kernels.
+
CUptiResult cuptiActivityEnableLaunchAttributes ( uint8_t enable )
+
Controls the collection of launch attributes for kernels.
+
CUptiResult cuptiActivityFlush ( CUcontext context, uint32_t streamId, uint32_t flag )
+
Wait for all activity records to be delivered via the completion callback.
+
CUptiResult cuptiActivityFlushAll ( uint32_t flag )
+
Request to deliver activity records via the buffer completion callback.
+
CUptiResult cuptiActivityFlushPeriod ( uint32_t time )
+
Sets the flush period for the worker thread.
+
CUptiResult cuptiActivityGetAttribute ( CUpti_ActivityAttribute attr, size_t* valueSize, void* value )
+
Read an activity API attribute.
+
CUptiResult cuptiActivityGetNextRecord ( uint8_t* buffer, size_t validBufferSizeBytes, CUpti_Activity** record )
+
Iterate over the activity records in a buffer.
+
CUptiResult cuptiActivityGetNumDroppedRecords ( CUcontext context, uint32_t streamId, size_t* dropped )
+
Get the number of activity records that were dropped of insufficient buffer space.
+
CUptiResult cuptiActivityPopExternalCorrelationId ( CUpti_ExternalCorrelationKind kind, uint64_t* lastId )
+
Pop an external correlation id for the calling thread.
+
CUptiResult cuptiActivityPushExternalCorrelationId ( CUpti_ExternalCorrelationKind kind, uint64_t id )
+
Push an external correlation id for the calling thread.
+
CUptiResult cuptiActivityRegisterCallbacks ( CUpti_BuffersCallbackRequestFunc funcBufferRequested, CUpti_BuffersCallbackCompleteFunc funcBufferCompleted )
+
Registers callback functions with CUPTI for activity buffer handling.
+
CUptiResult cuptiActivityRegisterTimestampCallback ( CUpti_TimestampCallbackFunc funcTimestamp )
+
Registers callback function with CUPTI for providing timestamp.
+
CUptiResult cuptiActivitySetAttribute ( CUpti_ActivityAttribute attr, size_t* valueSize, void* value )
+
Write an activity API attribute.
+
CUptiResult cuptiComputeCapabilitySupported ( int  major, int  minor, int* support )
+
Check support for a compute capability.
+
CUptiResult cuptiDeviceSupported ( CUdevice dev, int* support )
+
Check support for a compute device.
+
CUptiResult cuptiDeviceVirtualizationMode ( CUdevice dev, CUpti_DeviceVirtualizationMode* mode )
+
Query the virtualization mode of the device.
+
CUptiResult cuptiFinalize ( void )
+
Detach CUPTI from the running process.
+
CUptiResult cuptiGetAutoBoostState ( CUcontext context, CUpti_ActivityAutoBoostState* state )
+
Get auto boost state.
+
CUptiResult cuptiGetContextId ( CUcontext context, uint32_t* contextId )
+
Get the ID of a context.
+
CUptiResult cuptiGetDeviceId ( CUcontext context, uint32_t* deviceId )
+
Get the ID of a device.
+
CUptiResult cuptiGetGraphId ( CUgraph graph, uint32_t* pId )
+
Get the unique ID of graph.
+
CUptiResult cuptiGetGraphNodeId ( CUgraphNode node, uint64_t* nodeId )
+
Get the unique ID of a graph node.
+
CUptiResult cuptiGetLastError ( void )
+
Returns the last error from a cupti call or callback.
+
CUptiResult cuptiGetStreamId ( CUcontext context, CUstream stream, uint32_t* streamId )
+
Get the ID of a stream.
+
CUptiResult cuptiGetStreamIdEx ( CUcontext context, CUstream stream, uint8_t perThreadStream, uint32_t* streamId )
+
Get the ID of a stream.
+
CUptiResult cuptiGetThreadIdType ( CUpti_ActivityThreadIdType* type )
+
Get the thread-id type.
+
CUptiResult cuptiGetTimestamp ( uint64_t* timestamp )
+
Get the CUPTI timestamp.
+
CUptiResult cuptiSetThreadIdType ( CUpti_ActivityThreadIdType type )
+
Set the thread-id type.
+
+
+

Defines

+
+
+ #define CUPTI_AUTO_BOOST_INVALID_CLIENT_PID 0
+
+
+

An invalid/unknown process id.

+
+
+
+ #define CUPTI_CORRELATION_ID_UNKNOWN 0
+
+
+

An invalid/unknown correlation ID. A correlation ID of this value indicates that there is no correlation for the activity + record. +

+
+
+
+ #define CUPTI_FUNCTION_INDEX_ID_INVALID 0
+
+
+

An invalid function index ID.

+
+
+
+ #define CUPTI_GRID_ID_UNKNOWN 0LL
+
+
+

An invalid/unknown grid ID.

+
+
+
+ #define CUPTI_MAX_NVLINK_PORTS 32
+
+
+

Maximum NVLink port numbers.

+
+
+
+ #define CUPTI_NVLINK_INVALID_PORT -1
+
+
+

Invalid/unknown NVLink port number.

+
+
+
+ #define CUPTI_SOURCE_LOCATOR_ID_UNKNOWN 0
+
+
+

The source-locator ID that indicates an unknown source location. There is not an actual CUpti_ActivitySourceLocator object corresponding to this value. +

+
+
+
+ #define CUPTI_SYNCHRONIZATION_INVALID_VALUE -1
+
+
+

An invalid/unknown value.

+
+
+
+ #define CUPTI_TIMESTAMP_UNKNOWN 0LL
+
+
+

An invalid/unknown timestamp for a start, end, queued, submitted, or completed time.

+
+
+
+
+
+

Typedefs

+
+
+ + void + ( *CUpti_BuffersCallbackCompleteFunc )( CUcontext context,  uint32_t streamId, uint8_t* +  buffer,  size_t size,  size_t validSize )
+
+
+

Function type for callback used by CUPTI to return a buffer of activity records. This callback function returns to the CUPTI + client a buffer containing activity records. The buffer contains validSize bytes of activity records which should be read using cuptiActivityGetNextRecord. The number of dropped records can be read + using cuptiActivityGetNumDroppedRecords. After this call CUPTI relinquished ownership of the buffer and will not use it anymore. + The client may return the buffer to CUPTI using the CUpti_BuffersCallbackRequestFunc callback. Note: CUDA 6.0 onwards, all + buffers returned by this callback are global buffers i.e. there is no context/stream specific buffer. User needs to parse + the global buffer to extract the context/stream specific activity records. +

+

+
+
+
+
+ Parameters + +
+
+
context
+
The context this buffer is associated with. If NULL, the buffer is associated with the global activities. This field is deprecated + as of CUDA 6.0 and will always be NULL. +
+
uint32_t streamId
+
+
buffer
+
The activity record buffer.
+
size_t size
+
+
size_t validSize
+
+
+
+
+ + void + ( *CUpti_BuffersCallbackRequestFunc )( uint8_t* +  *buffer, size_t* +  size, size_t* +  maxNumRecords )
+
+
+

Function type for callback used by CUPTI to request an empty buffer for storing activity records. This callback function + signals the CUPTI client that an activity buffer is needed by CUPTI. The activity buffer is used by CUPTI to store activity + records. The callback function can decline the request by setting *buffer to NULL. In this case CUPTI may drop activity records. +

+

+
+
+
+
+ Parameters + +
+
+
*buffer
+
+
size
+
Returns the size of the returned buffer.
+
maxNumRecords
+
Returns the maximum number of records that should be placed in the buffer. If 0 then the buffer is filled with as many records + as possible. If > 0 the buffer is filled with at most that many records before it is returned. +
+
+
+
+ + uint64_t( + ( *CUpti_TimestampCallbackFunc )( )
+
+
+

Function type for callback used by CUPTI to request a timestamp to be used in activity records. This callback function signals + the CUPTI client that a timestamp needs to be returned. This timestamp would be treated as normalized timestamp to be used + for various purposes in CUPTI. For example to store start and end timestamps reported in the CUPTI activity records. The returned + timestamp must be in nanoseconds. +

+

See also:

+

cuptiActivityRegisterTimestampCallback

+

+
+
+
+
+
+

Enumerations

+
+
+ enum CUpti_ActivityAttribute
+
+
+

These attributes are used to control the behavior of the activity API.

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE = 0
+
The device memory size (in bytes) reserved for storing profiling data for concurrent kernels (activity kind CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL), memcopies and memsets for each buffer on a context. The value is a size_t.There is a limit on how many device buffers can + be allocated per context. User can query and set this limit using the attribute CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT. CUPTI doesn't pre-allocate all the buffers, it pre-allocates only those many buffers as set by the attribute CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_PRE_ALLOCATE_VALUE. When all of the data in a buffer is consumed, it is added in the reuse pool, and CUPTI picks a buffer from this pool when + a new buffer is needed. Thus memory footprint does not scale with the kernel count. Applications with the high density of + kernels, memcopies and memsets might result in having CUPTI to allocate more device buffers. CUPTI allocates another buffer + only when it runs out of the buffers in the reuse pool.Since buffer allocation happens in the main application thread, this + might result in stalls in the critical path. CUPTI pre-allocates 3 buffers of the same size to mitigate this issue. User can + query and set the pre-allocation limit using the attribute CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_PRE_ALLOCATE_VALUE.Having larger buffer size leaves less device memory for the application. Having smaller buffer size increases the risk of + dropping timestamps for records if too many kernels or memcopies or memsets are launched at one time.This value only applies + to new buffer allocations. Set this value before initializing CUDA or before creating a context to ensure it is considered + for the following allocations.The default value is 3200000 (~3MB) which can accommodate profiling data up to 100,000 kernels, + memcopies and memsets combined.Note: Starting with the CUDA 11.2 release, CUPTI allocates profiling buffer in the pinned host + memory by default as this might help in improving the performance of the tracing run. Refer to the description of the attribute + CUPTI_ACTIVITY_ATTR_MEM_ALLOCATION_TYPE_HOST_PINNED for more details. Size of the memory and maximum number of pools are still controlled by the attributes CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE and CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT.Note: The actual amount of device memory per buffer reserved by CUPTI might be larger. +
+
CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE_CDP = 1
+
The device memory size (in bytes) reserved for storing profiling data for CDP operations for each buffer on a context. The + value is a size_t.Having larger buffer size means less flush operations but consumes more device memory. This value only applies + to new allocations.Set this value before initializing CUDA or before creating a context to ensure it is considered for the + following allocations.The default value is 8388608 (8MB).Note: The actual amount of device memory per context reserved by + CUPTI might be larger. +
+
CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT = 2
+
The maximum number of device memory buffers per context. The value is a size_t.For an application with high rate of kernel + launches, memcopies and memsets having a bigger pool limit helps in timestamp collection for all these activties at the expense + of a larger memory footprint. Refer to the description of the attribute CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE for more details.Setting this value will not modify the number of memory buffers currently stored.Set this value before initializing + CUDA to ensure the limit is not exceeded.The default value is 250. +
+
CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_SIZE = 3
+
The profiling semaphore pool size reserved for storing profiling data for serialized kernels tracing (activity kind CUPTI_ACTIVITY_KIND_KERNEL) for each context. The value is a size_t.There is a limit on how many semaphore pools can be allocated per context. User + can query and set this limit using the attribute CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_LIMIT. CUPTI doesn't pre-allocate all the semaphore pools, it pre-allocates only those many semaphore pools as set by the attribute + CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_PRE_ALLOCATE_VALUE. When all of the data in a semaphore pool is consumed, it is added in the reuse pool, and CUPTI picks a semaphore pool from + the reuse pool when a new semaphore pool is needed. Thus memory footprint does not scale with the kernel count. Applications + with the high density of kernels might result in having CUPTI to allocate more semaphore pools. CUPTI allocates another semaphore + pool only when it runs out of the semaphore pools in the reuse pool.Since semaphore pool allocation happens in the main application + thread, this might result in stalls in the critical path. CUPTI pre-allocates 3 semaphore pools of the same size to mitigate + this issue. User can query and set the pre-allocation limit using the attribute CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_PRE_ALLOCATE_VALUE.Having larger semaphore pool size leaves less device memory for the application. Having smaller semaphore pool size increases + the risk of dropping timestamps for kernel records if too many kernels are issued/launched at one time.This value only applies + to new semaphore pool allocations. Set this value before initializing CUDA or before creating a context to ensure it is considered + for the following allocations.The default value is 25000 which can accommodate profiling data for upto 25,000 kernels. +
+
CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_LIMIT = 4
+
The maximum number of profiling semaphore pools per context. The value is a size_t.For an application with high rate of kernel + launches, having a bigger pool limit helps in timestamp collection for all the kernels, at the expense of a larger device + memory footprint. Refer to the description of the attribute CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_SIZE for more details.Set this value before initializing CUDA to ensure the limit is not exceeded.The default value is 250. +
+
CUPTI_ACTIVITY_ATTR_ZEROED_OUT_ACTIVITY_BUFFER = 5
+
The flag to indicate whether user should provide activity buffer of zero value. The value is a uint8_t.If the value of this + attribute is non-zero, user should provide a zero value buffer in the CUpti_BuffersCallbackRequestFunc. If the user does not provide a zero value buffer after setting this to non-zero, the activity buffer may contain some uninitialized + values when CUPTI returns it in CUpti_BuffersCallbackCompleteFuncIf the value of this attribute is zero, CUPTI will initialize the user buffer received in the CUpti_BuffersCallbackRequestFunc to zero before filling it. If the user sets this to zero, a few stalls may appear in critical path because CUPTI will zero + out the buffer in the main thread. Set this value before returning from CUpti_BuffersCallbackRequestFunc to ensure it is considered for all the subsequent user buffers.The default value is 0. +
+
CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_PRE_ALLOCATE_VALUE = 6
+
Number of device buffers to pre-allocate for a context during the initialization phase. The value is a size_t.Refer to the + description of the attribute CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE for details.This value must be less than the maximum number of device buffers set using the attribute CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMITSet this value before initializing CUDA or before creating a context to ensure it is considered by the CUPTI.The default value + is set to 3 to ping pong between these buffers (if possible). +
+
CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_PRE_ALLOCATE_VALUE = 7
+
Number of profiling semaphore pools to pre-allocate for a context during the initialization phase. The value is a size_t.Refer + to the description of the attribute CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_SIZE for details.This value must be less than the maximum number of profiling semaphore pools set using the attribute CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_LIMITSet this value before initializing CUDA or before creating a context to ensure it is considered by the CUPTI.The default value + is set to 3 to ping pong between these pools (if possible). +
+
CUPTI_ACTIVITY_ATTR_MEM_ALLOCATION_TYPE_HOST_PINNED = 8
+
Allocate page-locked (pinned) host memory for storing profiling data for concurrent kernels, memcopies and memsets for each + buffer on a context. The value is a uint8_t.Starting with the CUDA 11.2 release, CUPTI allocates profiling buffer in the pinned + host memory by default as this might help in improving the performance of the tracing run. Allocating excessive amounts of + pinned memory may degrade system performance, since it reduces the amount of memory available to the system for paging. For + this reason user might want to change the location from pinned host memory to device memory by setting value of this attribute + to 0.The default value is 1. +
+
CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityComputeApiKind
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_COMPUTE_API_UNKNOWN = 0
+
The compute API is not known.
+
CUPTI_ACTIVITY_COMPUTE_API_CUDA = 1
+
The compute APIs are for CUDA.
+
CUPTI_ACTIVITY_COMPUTE_API_CUDA_MPS = 2
+
The compute APIs are for CUDA running in MPS (Multi-Process Service) environment.
+
CUPTI_ACTIVITY_COMPUTE_API_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityEnvironmentKind
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_ENVIRONMENT_UNKNOWN = 0
+
Unknown data.
+
CUPTI_ACTIVITY_ENVIRONMENT_SPEED = 1
+
The environment data is related to speed.
+
CUPTI_ACTIVITY_ENVIRONMENT_TEMPERATURE = 2
+
The environment data is related to temperature.
+
CUPTI_ACTIVITY_ENVIRONMENT_POWER = 3
+
The environment data is related to power.
+
CUPTI_ACTIVITY_ENVIRONMENT_COOLING = 4
+
The environment data is related to cooling.
+
CUPTI_ACTIVITY_ENVIRONMENT_COUNT
+
+
CUPTI_ACTIVITY_ENVIRONMENT_KIND_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityFlag
+
+
+

Activity record flags. Flags can be combined by bitwise OR to associated multiple flags with an activity record. Each flag + is specific to a certain activity kind, as noted below. +

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_FLAG_NONE = 0
+
Indicates the activity record has no flags.
+
CUPTI_ACTIVITY_FLAG_DEVICE_CONCURRENT_KERNELS = 1<<0
+
Indicates the activity represents a device that supports concurrent kernel execution. Valid for CUPTI_ACTIVITY_KIND_DEVICE. + +
+
CUPTI_ACTIVITY_FLAG_DEVICE_ATTRIBUTE_CUDEVICE = 1<<0
+
Indicates if the activity represents a CUdevice_attribute value or a CUpti_DeviceAttribute value. Valid for CUPTI_ACTIVITY_KIND_DEVICE_ATTRIBUTE. + +
+
CUPTI_ACTIVITY_FLAG_MEMCPY_ASYNC = 1<<0
+
Indicates the activity represents an asynchronous memcpy operation. Valid for CUPTI_ACTIVITY_KIND_MEMCPY.
+
CUPTI_ACTIVITY_FLAG_MARKER_INSTANTANEOUS = 1<<0
+
Indicates the activity represents an instantaneous marker. Valid for CUPTI_ACTIVITY_KIND_MARKER.
+
CUPTI_ACTIVITY_FLAG_MARKER_START = 1<<1
+
Indicates the activity represents a region start marker. Valid for CUPTI_ACTIVITY_KIND_MARKER.
+
CUPTI_ACTIVITY_FLAG_MARKER_END = 1<<2
+
Indicates the activity represents a region end marker. Valid for CUPTI_ACTIVITY_KIND_MARKER.
+
CUPTI_ACTIVITY_FLAG_MARKER_SYNC_ACQUIRE = 1<<3
+
Indicates the activity represents an attempt to acquire a user defined synchronization object. Valid for CUPTI_ACTIVITY_KIND_MARKER. + +
+
CUPTI_ACTIVITY_FLAG_MARKER_SYNC_ACQUIRE_SUCCESS = 1<<4
+
Indicates the activity represents success in acquiring the user defined synchronization object. Valid for CUPTI_ACTIVITY_KIND_MARKER. + +
+
CUPTI_ACTIVITY_FLAG_MARKER_SYNC_ACQUIRE_FAILED = 1<<5
+
Indicates the activity represents failure in acquiring the user defined synchronization object. Valid for CUPTI_ACTIVITY_KIND_MARKER. + +
+
CUPTI_ACTIVITY_FLAG_MARKER_SYNC_RELEASE = 1<<6
+
Indicates the activity represents releasing a reservation on user defined synchronization object. Valid for CUPTI_ACTIVITY_KIND_MARKER. + +
+
CUPTI_ACTIVITY_FLAG_MARKER_COLOR_NONE = 1<<0
+
Indicates the activity represents a marker that does not specify a color. Valid for CUPTI_ACTIVITY_KIND_MARKER_DATA.
+
CUPTI_ACTIVITY_FLAG_MARKER_COLOR_ARGB = 1<<1
+
Indicates the activity represents a marker that specifies a color in alpha-red-green-blue format. Valid for CUPTI_ACTIVITY_KIND_MARKER_DATA. + +
+
CUPTI_ACTIVITY_FLAG_GLOBAL_ACCESS_KIND_SIZE_MASK = 0xFF<<0
+
The number of bytes requested by each thread Valid for CUpti_ActivityGlobalAccess3. +
+
CUPTI_ACTIVITY_FLAG_GLOBAL_ACCESS_KIND_LOAD = 1<<8
+
If bit in this flag is set, the access was load, else it is a store access. Valid for CUpti_ActivityGlobalAccess3. +
+
CUPTI_ACTIVITY_FLAG_GLOBAL_ACCESS_KIND_CACHED = 1<<9
+
If this bit in flag is set, the load access was cached else it is uncached. Valid for CUpti_ActivityGlobalAccess3. +
+
CUPTI_ACTIVITY_FLAG_METRIC_OVERFLOWED = 1<<0
+
If this bit in flag is set, the metric value overflowed. Valid for CUpti_ActivityMetric and CUpti_ActivityMetricInstance. +
+
CUPTI_ACTIVITY_FLAG_METRIC_VALUE_INVALID = 1<<1
+
If this bit in flag is set, the metric value couldn't be calculated. This occurs when a value(s) required to calculate the + metric is missing. Valid for CUpti_ActivityMetric and CUpti_ActivityMetricInstance. +
+
CUPTI_ACTIVITY_FLAG_INSTRUCTION_VALUE_INVALID = 1<<0
+
If this bit in flag is set, the source level metric value couldn't be calculated. This occurs when a value(s) required to + calculate the source level metric cannot be evaluated. Valid for CUpti_ActivityInstructionExecution. +
+
CUPTI_ACTIVITY_FLAG_INSTRUCTION_CLASS_MASK = 0xFF<<1
+
The mask for the instruction class, CUpti_ActivityInstructionClass Valid for CUpti_ActivityInstructionExecution and CUpti_ActivityInstructionCorrelation
+
CUPTI_ACTIVITY_FLAG_FLUSH_FORCED = 1<<0
+
When calling cuptiActivityFlushAll, this flag can be set to force CUPTI to flush all records in the buffer, whether finished + or not +
+
CUPTI_ACTIVITY_FLAG_SHARED_ACCESS_KIND_SIZE_MASK = 0xFF<<0
+
The number of bytes requested by each thread Valid for CUpti_ActivitySharedAccess. +
+
CUPTI_ACTIVITY_FLAG_SHARED_ACCESS_KIND_LOAD = 1<<8
+
If bit in this flag is set, the access was load, else it is a store access. Valid for CUpti_ActivitySharedAccess. +
+
CUPTI_ACTIVITY_FLAG_MEMSET_ASYNC = 1<<0
+
Indicates the activity represents an asynchronous memset operation. Valid for CUPTI_ACTIVITY_KIND_MEMSET.
+
CUPTI_ACTIVITY_FLAG_THRASHING_IN_CPU = 1<<0
+
Indicates the activity represents thrashing in CPU. Valid for counter of kind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THRASHING + in CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER +
+
CUPTI_ACTIVITY_FLAG_THROTTLING_IN_CPU = 1<<0
+
Indicates the activity represents page throttling in CPU. Valid for counter of kind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THROTTLING + in CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER +
+
CUPTI_ACTIVITY_FLAG_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityInstructionClass
+
+
+

The sass instruction are broadly divided into different class. Each enum represents a classification.

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_UNKNOWN = 0
+
The instruction class is not known.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_FP_32 = 1
+
Represents a 32 bit floating point operation.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_FP_64 = 2
+
Represents a 64 bit floating point operation.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_INTEGER = 3
+
Represents an integer operation.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_BIT_CONVERSION = 4
+
Represents a bit conversion operation.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_CONTROL_FLOW = 5
+
Represents a control flow instruction.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_GLOBAL = 6
+
Represents a global load-store instruction.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_SHARED = 7
+
Represents a shared load-store instruction.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_LOCAL = 8
+
Represents a local load-store instruction.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_GENERIC = 9
+
Represents a generic load-store instruction.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_SURFACE = 10
+
Represents a surface load-store instruction.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_CONSTANT = 11
+
Represents a constant load instruction.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_TEXTURE = 12
+
Represents a texture load-store instruction.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_GLOBAL_ATOMIC = 13
+
Represents a global atomic instruction.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_SHARED_ATOMIC = 14
+
Represents a shared atomic instruction.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_SURFACE_ATOMIC = 15
+
Represents a surface atomic instruction.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_INTER_THREAD_COMMUNICATION = 16
+
Represents a inter-thread communication instruction.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_BARRIER = 17
+
Represents a barrier instruction.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_MISCELLANEOUS = 18
+
Represents some miscellaneous instructions which do not fit in the above classification.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_FP_16 = 19
+
Represents a 16 bit floating point operation.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_UNIFORM = 20
+
Represents uniform instruction.
+
CUPTI_ACTIVITY_INSTRUCTION_CLASS_KIND_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityJitEntryType
+
+
+

To be used in CUpti_ActivityJit. +

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_JIT_ENTRY_INVALID = 0
+
+
CUPTI_ACTIVITY_JIT_ENTRY_PTX_TO_CUBIN = 1
+
PTX to CUBIN.
+
CUPTI_ACTIVITY_JIT_ENTRY_NVVM_IR_TO_PTX = 2
+
NVVM-IR to PTX
+
CUPTI_ACTIVITY_JIT_ENTRY_TYPE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityJitOperationType
+
+
+

To be used in CUpti_ActivityJit. +

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_JIT_OPERATION_INVALID = 0
+
+
CUPTI_ACTIVITY_JIT_OPERATION_CACHE_LOAD = 1
+
Loaded from the compute cache.
+
CUPTI_ACTIVITY_JIT_OPERATION_CACHE_STORE = 2
+
Stored in the compute cache.
+
CUPTI_ACTIVITY_JIT_OPERATION_COMPILE = 3
+
JIT compilation.
+
CUPTI_ACTIVITY_JIT_OPERATION_TYPE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityKind
+
+
+

Each activity record kind represents information about a GPU or an activity occurring on a CPU or GPU. Each kind is associated + with a activity record structure that holds the information associated with the kind. +

+

See also:

+

CUpti_Activity

+

CUpti_ActivityAPI

+

CUpti_ActivityContext

+

CUpti_ActivityDevice

+

CUpti_ActivityDevice2

+

CUpti_ActivityDevice3

+

CUpti_ActivityDevice4

+

CUpti_ActivityDeviceAttribute

+

CUpti_ActivityEvent

+

CUpti_ActivityEventInstance

+

CUpti_ActivityKernel

+

CUpti_ActivityKernel2

+

CUpti_ActivityKernel3

+

CUpti_ActivityKernel4

+

CUpti_ActivityKernel5

+

CUpti_ActivityKernel6

+

CUpti_ActivityKernel7

+

CUpti_ActivityKernel8

+

CUpti_ActivityCdpKernel

+

CUpti_ActivityPreemption

+

CUpti_ActivityMemcpy

+

CUpti_ActivityMemcpy3

+

CUpti_ActivityMemcpy4

+

CUpti_ActivityMemcpy5

+

CUpti_ActivityMemcpyPtoP

+

CUpti_ActivityMemcpyPtoP2

+

CUpti_ActivityMemcpyPtoP3

+

CUpti_ActivityMemcpyPtoP4

+

CUpti_ActivityMemset

+

CUpti_ActivityMemset2

+

CUpti_ActivityMemset3

+

CUpti_ActivityMemset4

+

CUpti_ActivityMetric

+

CUpti_ActivityMetricInstance

+

CUpti_ActivityName

+

CUpti_ActivityMarker

+

CUpti_ActivityMarker2

+

CUpti_ActivityMarkerData

+

CUpti_ActivitySourceLocator

+

CUpti_ActivityGlobalAccess

+

CUpti_ActivityGlobalAccess2

+

CUpti_ActivityGlobalAccess3

+

CUpti_ActivityBranch

+

CUpti_ActivityBranch2

+

CUpti_ActivityOverhead

+

CUpti_ActivityEnvironment

+

CUpti_ActivityInstructionExecution

+

CUpti_ActivityUnifiedMemoryCounter

+

CUpti_ActivityFunction

+

CUpti_ActivityModule

+

CUpti_ActivitySharedAccess

+

CUpti_ActivityPCSampling

+

CUpti_ActivityPCSampling2

+

CUpti_ActivityPCSampling3

+

CUpti_ActivityPCSamplingRecordInfo

+

CUpti_ActivityCudaEvent

+

CUpti_ActivityStream

+

CUpti_ActivitySynchronization

+

CUpti_ActivityInstructionCorrelation

+

CUpti_ActivityExternalCorrelation

+

CUpti_ActivityUnifiedMemoryCounter2

+

CUpti_ActivityOpenAccData

+

CUpti_ActivityOpenAccLaunch

+

CUpti_ActivityOpenAccOther

+

CUpti_ActivityOpenMp

+

CUpti_ActivityNvLink

+

CUpti_ActivityNvLink2

+

CUpti_ActivityNvLink3

+

CUpti_ActivityNvLink4

+

CUpti_ActivityMemory

+

CUpti_ActivityPcie

+

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_KIND_INVALID = 0
+
The activity record is invalid.
+
CUPTI_ACTIVITY_KIND_MEMCPY = 1
+
A host<->host, host<->device, or device<->device memory copy. The corresponding activity record structure is CUpti_ActivityMemcpy5. +
+
CUPTI_ACTIVITY_KIND_MEMSET = 2
+
A memory set executing on the GPU. The corresponding activity record structure is CUpti_ActivityMemset4. +
+
CUPTI_ACTIVITY_KIND_KERNEL = 3
+
A kernel executing on the GPU. This activity kind may significantly change the overall performance characteristics of the + application because all kernel executions are serialized on the GPU. Other activity kind for kernel CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL + doesn't break kernel concurrency. The corresponding activity record structure is CUpti_ActivityKernel8. +
+
CUPTI_ACTIVITY_KIND_DRIVER = 4
+
A CUDA driver API function execution. The corresponding activity record structure is CUpti_ActivityAPI. +
+
CUPTI_ACTIVITY_KIND_RUNTIME = 5
+
A CUDA runtime API function execution. The corresponding activity record structure is CUpti_ActivityAPI. +
+
CUPTI_ACTIVITY_KIND_EVENT = 6
+
An event value. The corresponding activity record structure is CUpti_ActivityEvent. +
+
CUPTI_ACTIVITY_KIND_METRIC = 7
+
A metric value. The corresponding activity record structure is CUpti_ActivityMetric. +
+
CUPTI_ACTIVITY_KIND_DEVICE = 8
+
Information about a device. The corresponding activity record structure is CUpti_ActivityDevice4. +
+
CUPTI_ACTIVITY_KIND_CONTEXT = 9
+
Information about a context. The corresponding activity record structure is CUpti_ActivityContext. +
+
CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL = 10
+
A kernel executing on the GPU. This activity kind doesn't break kernel concurrency. The corresponding activity record structure + is CUpti_ActivityKernel8. +
+
CUPTI_ACTIVITY_KIND_NAME = 11
+
Resource naming done via NVTX APIs for thread, device, context, etc. The corresponding activity record structure is CUpti_ActivityName. +
+
CUPTI_ACTIVITY_KIND_MARKER = 12
+
Instantaneous, start, or end NVTX marker. The corresponding activity record structure is CUpti_ActivityMarker2. +
+
CUPTI_ACTIVITY_KIND_MARKER_DATA = 13
+
Extended, optional, data about a marker. The corresponding activity record structure is CUpti_ActivityMarkerData. +
+
CUPTI_ACTIVITY_KIND_SOURCE_LOCATOR = 14
+
Source information about source level result. The corresponding activity record structure is CUpti_ActivitySourceLocator. +
+
CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS = 15
+
Results for source-level global acccess. The corresponding activity record structure is CUpti_ActivityGlobalAccess3. +
+
CUPTI_ACTIVITY_KIND_BRANCH = 16
+
Results for source-level branch. The corresponding activity record structure is CUpti_ActivityBranch2. +
+
CUPTI_ACTIVITY_KIND_OVERHEAD = 17
+
Overhead activity records. The corresponding activity record structure is CUpti_ActivityOverhead. +
+
CUPTI_ACTIVITY_KIND_CDP_KERNEL = 18
+
A CDP (CUDA Dynamic Parallel) kernel executing on the GPU. The corresponding activity record structure is CUpti_ActivityCdpKernel. This activity can not be directly enabled or disabled. It is enabled and disabled through concurrent kernel activity i.e. + _CONCURRENT_KERNEL. +
+
CUPTI_ACTIVITY_KIND_PREEMPTION = 19
+
Preemption activity record indicating a preemption of a CDP (CUDA Dynamic Parallel) kernel executing on the GPU. The corresponding + activity record structure is CUpti_ActivityPreemption. +
+
CUPTI_ACTIVITY_KIND_ENVIRONMENT = 20
+
Environment activity records indicating power, clock, thermal, etc. levels of the GPU. The corresponding activity record structure + is CUpti_ActivityEnvironment. +
+
CUPTI_ACTIVITY_KIND_EVENT_INSTANCE = 21
+
An event value associated with a specific event domain instance. The corresponding activity record structure is CUpti_ActivityEventInstance. +
+
CUPTI_ACTIVITY_KIND_MEMCPY2 = 22
+
A peer to peer memory copy. The corresponding activity record structure is CUpti_ActivityMemcpyPtoP4. +
+
CUPTI_ACTIVITY_KIND_METRIC_INSTANCE = 23
+
A metric value associated with a specific metric domain instance. The corresponding activity record structure is CUpti_ActivityMetricInstance. +
+
CUPTI_ACTIVITY_KIND_INSTRUCTION_EXECUTION = 24
+
Results for source-level instruction execution. The corresponding activity record structure is CUpti_ActivityInstructionExecution. +
+
CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER = 25
+
Unified Memory counter record. The corresponding activity record structure is CUpti_ActivityUnifiedMemoryCounter2. +
+
CUPTI_ACTIVITY_KIND_FUNCTION = 26
+
Device global/function record. The corresponding activity record structure is CUpti_ActivityFunction. +
+
CUPTI_ACTIVITY_KIND_MODULE = 27
+
CUDA Module record. The corresponding activity record structure is CUpti_ActivityModule. +
+
CUPTI_ACTIVITY_KIND_DEVICE_ATTRIBUTE = 28
+
A device attribute value. The corresponding activity record structure is CUpti_ActivityDeviceAttribute. +
+
CUPTI_ACTIVITY_KIND_SHARED_ACCESS = 29
+
Results for source-level shared acccess. The corresponding activity record structure is CUpti_ActivitySharedAccess. +
+
CUPTI_ACTIVITY_KIND_PC_SAMPLING = 30
+
Enable PC sampling for kernels. This will serialize kernels. The corresponding activity record structure is CUpti_ActivityPCSampling3. +
+
CUPTI_ACTIVITY_KIND_PC_SAMPLING_RECORD_INFO = 31
+
Summary information about PC sampling records. The corresponding activity record structure is CUpti_ActivityPCSamplingRecordInfo. +
+
CUPTI_ACTIVITY_KIND_INSTRUCTION_CORRELATION = 32
+
SASS/Source line-by-line correlation record. This will generate sass/source correlation for functions that have source level + analysis or pc sampling results. The records will be generated only when either of source level analysis or pc sampling activity + is enabled. The corresponding activity record structure is CUpti_ActivityInstructionCorrelation. +
+
CUPTI_ACTIVITY_KIND_OPENACC_DATA = 33
+
OpenACC data events. The corresponding activity record structure is CUpti_ActivityOpenAccData. +
+
CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH = 34
+
OpenACC launch events. The corresponding activity record structure is CUpti_ActivityOpenAccLaunch. +
+
CUPTI_ACTIVITY_KIND_OPENACC_OTHER = 35
+
OpenACC other events. The corresponding activity record structure is CUpti_ActivityOpenAccOther. +
+
CUPTI_ACTIVITY_KIND_CUDA_EVENT = 36
+
Information about a CUDA event. The corresponding activity record structure is CUpti_ActivityCudaEvent. +
+
CUPTI_ACTIVITY_KIND_STREAM = 37
+
Information about a CUDA stream. The corresponding activity record structure is CUpti_ActivityStream. +
+
CUPTI_ACTIVITY_KIND_SYNCHRONIZATION = 38
+
Records for synchronization management. The corresponding activity record structure is CUpti_ActivitySynchronization. +
+
CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION = 39
+
Records for correlation of different programming APIs. The corresponding activity record structure is CUpti_ActivityExternalCorrelation. +
+
CUPTI_ACTIVITY_KIND_NVLINK = 40
+
NVLink information. The corresponding activity record structure is CUpti_ActivityNvLink4. +
+
CUPTI_ACTIVITY_KIND_INSTANTANEOUS_EVENT = 41
+
Instantaneous Event information. The corresponding activity record structure is CUpti_ActivityInstantaneousEvent. +
+
CUPTI_ACTIVITY_KIND_INSTANTANEOUS_EVENT_INSTANCE = 42
+
Instantaneous Event information for a specific event domain instance. The corresponding activity record structure is CUpti_ActivityInstantaneousEventInstance
+
CUPTI_ACTIVITY_KIND_INSTANTANEOUS_METRIC = 43
+
Instantaneous Metric information The corresponding activity record structure is CUpti_ActivityInstantaneousMetric. +
+
CUPTI_ACTIVITY_KIND_INSTANTANEOUS_METRIC_INSTANCE = 44
+
Instantaneous Metric information for a specific metric domain instance. The corresponding activity record structure is CUpti_ActivityInstantaneousMetricInstance. +
+
CUPTI_ACTIVITY_KIND_MEMORY = 45
+
Memory activity tracking allocation and freeing of the memory The corresponding activity record structure is CUpti_ActivityMemory. +
+
CUPTI_ACTIVITY_KIND_PCIE = 46
+
PCI devices information used for PCI topology. The corresponding activity record structure is CUpti_ActivityPcie. +
+
CUPTI_ACTIVITY_KIND_OPENMP = 47
+
OpenMP parallel events. The corresponding activity record structure is CUpti_ActivityOpenMp. +
+
CUPTI_ACTIVITY_KIND_INTERNAL_LAUNCH_API = 48
+
A CUDA driver kernel launch occurring outside of any public API function execution. Tools can handle these like records for + driver API launch functions, although the cbid field is not used here. The corresponding activity record structure is CUpti_ActivityAPI. +
+
CUPTI_ACTIVITY_KIND_MEMORY2 = 49
+
Memory activity tracking allocation and freeing of the memory The corresponding activity record structure is CUpti_ActivityMemory3. +
+
CUPTI_ACTIVITY_KIND_MEMORY_POOL = 50
+
Memory pool activity tracking creation, destruction and triming of the memory pool. The corresponding activity record structure + is CUpti_ActivityMemoryPool2. +
+
CUPTI_ACTIVITY_KIND_GRAPH_TRACE = 51
+
The corresponding activity record structure is CUpti_ActivityGraphTrace. +
+
CUPTI_ACTIVITY_KIND_JIT = 52
+
JIT operation tracking The corresponding activity record structure is CUpti_ActivityJit. +
+
CUPTI_ACTIVITY_KIND_COUNT
+
+
CUPTI_ACTIVITY_KIND_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityLaunchType
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_LAUNCH_TYPE_REGULAR = 0
+
The kernel was launched via a regular kernel call
+
CUPTI_ACTIVITY_LAUNCH_TYPE_COOPERATIVE_SINGLE_DEVICE = 1
+
The kernel was launched via API cudaLaunchCooperativeKernel() or cuLaunchCooperativeKernel()
+
CUPTI_ACTIVITY_LAUNCH_TYPE_COOPERATIVE_MULTI_DEVICE = 2
+
The kernel was launched via API cudaLaunchCooperativeKernelMultiDevice() or cuLaunchCooperativeKernelMultiDevice()
+
+
+
+
+ enum CUpti_ActivityMemcpyKind
+
+
+

Each kind represents the source and destination targets of a memory copy. Targets are host, device, and array.

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_MEMCPY_KIND_UNKNOWN = 0
+
The memory copy kind is not known.
+
CUPTI_ACTIVITY_MEMCPY_KIND_HTOD = 1
+
A host to device memory copy.
+
CUPTI_ACTIVITY_MEMCPY_KIND_DTOH = 2
+
A device to host memory copy.
+
CUPTI_ACTIVITY_MEMCPY_KIND_HTOA = 3
+
A host to device array memory copy.
+
CUPTI_ACTIVITY_MEMCPY_KIND_ATOH = 4
+
A device array to host memory copy.
+
CUPTI_ACTIVITY_MEMCPY_KIND_ATOA = 5
+
A device array to device array memory copy.
+
CUPTI_ACTIVITY_MEMCPY_KIND_ATOD = 6
+
A device array to device memory copy.
+
CUPTI_ACTIVITY_MEMCPY_KIND_DTOA = 7
+
A device to device array memory copy.
+
CUPTI_ACTIVITY_MEMCPY_KIND_DTOD = 8
+
A device to device memory copy on the same device.
+
CUPTI_ACTIVITY_MEMCPY_KIND_HTOH = 9
+
A host to host memory copy.
+
CUPTI_ACTIVITY_MEMCPY_KIND_PTOP = 10
+
A peer to peer memory copy across different devices.
+
CUPTI_ACTIVITY_MEMCPY_KIND_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityMemoryKind
+
+
+

Each kind represents the type of the memory accessed by a memory operation/copy.

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_MEMORY_KIND_UNKNOWN = 0
+
The memory kind is unknown.
+
CUPTI_ACTIVITY_MEMORY_KIND_PAGEABLE = 1
+
The memory is pageable.
+
CUPTI_ACTIVITY_MEMORY_KIND_PINNED = 2
+
The memory is pinned.
+
CUPTI_ACTIVITY_MEMORY_KIND_DEVICE = 3
+
The memory is on the device.
+
CUPTI_ACTIVITY_MEMORY_KIND_ARRAY = 4
+
The memory is an array.
+
CUPTI_ACTIVITY_MEMORY_KIND_MANAGED = 5
+
The memory is managed
+
CUPTI_ACTIVITY_MEMORY_KIND_DEVICE_STATIC = 6
+
The memory is device static
+
CUPTI_ACTIVITY_MEMORY_KIND_MANAGED_STATIC = 7
+
The memory is managed static
+
CUPTI_ACTIVITY_MEMORY_KIND_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityMemoryOperationType
+
+
+

Describes the type of memory operation, to be used with CUpti_ActivityMemory3. +

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_MEMORY_OPERATION_TYPE_INVALID = 0
+
+
CUPTI_ACTIVITY_MEMORY_OPERATION_TYPE_ALLOCATION = 1
+
Memory is allocated.
+
CUPTI_ACTIVITY_MEMORY_OPERATION_TYPE_RELEASE = 2
+
Memory is released.
+
CUPTI_ACTIVITY_MEMORY_OPERATION_TYPE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityMemoryPoolOperationType
+
+
+

Describes the type of memory pool operation, to be used with CUpti_ActivityMemoryPool2. +

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_MEMORY_POOL_OPERATION_TYPE_INVALID = 0
+
+
CUPTI_ACTIVITY_MEMORY_POOL_OPERATION_TYPE_CREATED = 1
+
Memory pool is created.
+
CUPTI_ACTIVITY_MEMORY_POOL_OPERATION_TYPE_DESTROYED = 2
+
Memory pool is destroyed.
+
CUPTI_ACTIVITY_MEMORY_POOL_OPERATION_TYPE_TRIMMED = 3
+
Memory pool is trimmed.
+
CUPTI_ACTIVITY_MEMORY_POOL_OPERATION_TYPE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityMemoryPoolType
+
+
+

Describes the type of memory pool, to be used with CUpti_ActivityMemory3. +

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_MEMORY_POOL_TYPE_INVALID = 0
+
+
CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL = 1
+
Memory pool is local to the process.
+
CUPTI_ACTIVITY_MEMORY_POOL_TYPE_IMPORTED = 2
+
Memory pool is imported by the process.
+
CUPTI_ACTIVITY_MEMORY_POOL_TYPE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityObjectKind
+
+
+

See also:

+

CUpti_ActivityObjectKindId

+

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_OBJECT_UNKNOWN = 0
+
The object kind is not known.
+
CUPTI_ACTIVITY_OBJECT_PROCESS = 1
+
A process.
+
CUPTI_ACTIVITY_OBJECT_THREAD = 2
+
A thread.
+
CUPTI_ACTIVITY_OBJECT_DEVICE = 3
+
A device.
+
CUPTI_ACTIVITY_OBJECT_CONTEXT = 4
+
A context.
+
CUPTI_ACTIVITY_OBJECT_STREAM = 5
+
A stream.
+
CUPTI_ACTIVITY_OBJECT_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityOverheadKind
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_OVERHEAD_UNKNOWN = 0
+
The overhead kind is not known.
+
CUPTI_ACTIVITY_OVERHEAD_DRIVER_COMPILER = 1
+
Compiler(JIT) overhead.
+
CUPTI_ACTIVITY_OVERHEAD_CUPTI_BUFFER_FLUSH = 1<<16
+
Activity buffer flush overhead.
+
CUPTI_ACTIVITY_OVERHEAD_CUPTI_INSTRUMENTATION = 2<<16
+
CUPTI instrumentation overhead.
+
CUPTI_ACTIVITY_OVERHEAD_CUPTI_RESOURCE = 3<<16
+
CUPTI resource creation and destruction overhead.
+
CUPTI_ACTIVITY_OVERHEAD_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityPCSamplingPeriod
+
+
+

Sampling period can be set using cuptiActivityConfigurePCSampling

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_INVALID = 0
+
The PC sampling period is not set.
+
CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_MIN = 1
+
Minimum sampling period available on the device.
+
CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_LOW = 2
+
Sampling period in lower range.
+
CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_MID = 3
+
Medium sampling period.
+
CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_HIGH = 4
+
Sampling period in higher range.
+
CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_MAX = 5
+
Maximum sampling period available on the device.
+
CUPTI_ACTIVITY_PC_SAMPLING_PERIOD_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityPCSamplingStallReason
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_PC_SAMPLING_STALL_INVALID = 0
+
Invalid reason
+
CUPTI_ACTIVITY_PC_SAMPLING_STALL_NONE = 1
+
No stall, instruction is selected for issue
+
CUPTI_ACTIVITY_PC_SAMPLING_STALL_INST_FETCH = 2
+
Warp is blocked because next instruction is not yet available, because of instruction cache miss, or because of branching + effects +
+
CUPTI_ACTIVITY_PC_SAMPLING_STALL_EXEC_DEPENDENCY = 3
+
Instruction is waiting on an arithmatic dependency
+
CUPTI_ACTIVITY_PC_SAMPLING_STALL_MEMORY_DEPENDENCY = 4
+
Warp is blocked because it is waiting for a memory access to complete.
+
CUPTI_ACTIVITY_PC_SAMPLING_STALL_TEXTURE = 5
+
Texture sub-system is fully utilized or has too many outstanding requests.
+
CUPTI_ACTIVITY_PC_SAMPLING_STALL_SYNC = 6
+
Warp is blocked as it is waiting at __syncthreads() or at memory barrier.
+
CUPTI_ACTIVITY_PC_SAMPLING_STALL_CONSTANT_MEMORY_DEPENDENCY = 7
+
Warp is blocked waiting for __constant__ memory and immediate memory access to complete.
+
CUPTI_ACTIVITY_PC_SAMPLING_STALL_PIPE_BUSY = 8
+
Compute operation cannot be performed due to the required resources not being available.
+
CUPTI_ACTIVITY_PC_SAMPLING_STALL_MEMORY_THROTTLE = 9
+
Warp is blocked because there are too many pending memory operations. In Kepler architecture it often indicates high number + of memory replays. +
+
CUPTI_ACTIVITY_PC_SAMPLING_STALL_NOT_SELECTED = 10
+
Warp was ready to issue, but some other warp issued instead.
+
CUPTI_ACTIVITY_PC_SAMPLING_STALL_OTHER = 11
+
Miscellaneous reasons
+
CUPTI_ACTIVITY_PC_SAMPLING_STALL_SLEEPING = 12
+
Sleeping.
+
CUPTI_ACTIVITY_PC_SAMPLING_STALL_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityPartitionedGlobalCacheConfig
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_PARTITIONED_GLOBAL_CACHE_CONFIG_UNKNOWN = 0
+
Partitioned global cache config unknown.
+
CUPTI_ACTIVITY_PARTITIONED_GLOBAL_CACHE_CONFIG_NOT_SUPPORTED = 1
+
Partitioned global cache not supported.
+
CUPTI_ACTIVITY_PARTITIONED_GLOBAL_CACHE_CONFIG_OFF = 2
+
Partitioned global cache config off.
+
CUPTI_ACTIVITY_PARTITIONED_GLOBAL_CACHE_CONFIG_ON = 3
+
Partitioned global cache config on.
+
CUPTI_ACTIVITY_PARTITIONED_GLOBAL_CACHE_CONFIG_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityPreemptionKind
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_PREEMPTION_KIND_UNKNOWN = 0
+
The preemption kind is not known.
+
CUPTI_ACTIVITY_PREEMPTION_KIND_SAVE = 1
+
Preemption to save CDP block.
+
CUPTI_ACTIVITY_PREEMPTION_KIND_RESTORE = 2
+
Preemption to restore CDP block.
+
CUPTI_ACTIVITY_PREEMPTION_KIND_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityStreamFlag
+
+
+

The types of stream to be used with CUpti_ActivityStream. +

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_STREAM_CREATE_FLAG_UNKNOWN = 0
+
Unknown data.
+
CUPTI_ACTIVITY_STREAM_CREATE_FLAG_DEFAULT = 1
+
Default stream.
+
CUPTI_ACTIVITY_STREAM_CREATE_FLAG_NON_BLOCKING = 2
+
Non-blocking stream.
+
CUPTI_ACTIVITY_STREAM_CREATE_FLAG_NULL = 3
+
Null stream.
+
CUPTI_ACTIVITY_STREAM_CREATE_MASK = 0xFFFF
+
Stream create Mask
+
CUPTI_ACTIVITY_STREAM_CREATE_FLAG_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivitySynchronizationType
+
+
+

The types of synchronization to be used with CUpti_ActivitySynchronization. +

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_UNKNOWN = 0
+
Unknown data.
+
CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_EVENT_SYNCHRONIZE = 1
+
Event synchronize API.
+
CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_STREAM_WAIT_EVENT = 2
+
Stream wait event API.
+
CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_STREAM_SYNCHRONIZE = 3
+
Stream synchronize API.
+
CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_CONTEXT_SYNCHRONIZE = 4
+
Context synchronize API.
+
CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityThreadIdType
+
+
+

CUPTI uses different methods to obtain the thread-id depending on the support and the underlying platform. This enum documents + these methods for each type. APIs cuptiSetThreadIdType and cuptiGetThreadIdType can be used to set and get the thread-id type. +

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_THREAD_ID_TYPE_DEFAULT = 0
+
Default type Windows uses API GetCurrentThreadId() Linux/Mac/Android/QNX use POSIX pthread API pthread_self()
+
CUPTI_ACTIVITY_THREAD_ID_TYPE_SYSTEM = 1
+
This type is based on the system API available on the underlying platform and thread-id obtained is supposed to be unique + for the process lifetime. Windows uses API GetCurrentThreadId() Linux uses syscall SYS_gettid Mac uses syscall SYS_thread_selfid + Android/QNX use gettid() +
+
CUPTI_ACTIVITY_THREAD_ID_TYPE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityUnifiedMemoryAccessType
+
+ +
+
+ Values + +
+
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_ACCESS_TYPE_UNKNOWN = 0
+
The unified memory access type is not known
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_ACCESS_TYPE_READ = 1
+
The page fault was triggered by read memory instruction
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_ACCESS_TYPE_WRITE = 2
+
The page fault was triggered by write memory instruction
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_ACCESS_TYPE_ATOMIC = 3
+
The page fault was triggered by atomic memory instruction
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_ACCESS_TYPE_PREFETCH = 4
+
The page fault was triggered by memory prefetch operation
+
+
+
+
+ enum CUpti_ActivityUnifiedMemoryCounterKind
+
+
+

Many activities are associated with Unified Memory mechanism; among them are tranfer from host to device, device to host, + page fault at host side. +

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_UNKNOWN = 0
+
The unified memory counter kind is not known.
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD = 1
+
Number of bytes transfered from host to device
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH = 2
+
Number of bytes transfered from device to host
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT = 3
+
Number of CPU page faults, this is only supported on 64 bit Linux and Mac platforms
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT = 4
+
Number of GPU page faults, this is only supported on devices with compute capability 6.0 and higher and 64 bit Linux platforms + +
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THRASHING = 5
+
Thrashing occurs when data is frequently accessed by multiple processors and has to be constantly migrated around to achieve + data locality. In this case the overhead of migration may exceed the benefits of locality. This is only supported on 64 bit + Linux platforms. +
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THROTTLING = 6
+
Throttling is a prevention technique used by the driver to avoid further thrashing. Here, the driver doesn't service the fault + for one of the contending processors for a specific period of time, so that the other processor can run at full-speed. This + is only supported on 64 bit Linux platforms. +
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_REMOTE_MAP = 7
+
In case throttling does not help, the driver tries to pin the memory to a processor for a specific period of time. One of + the contending processors will have slow access to the memory, while the other will have fast access. This is only supported + on 64 bit Linux platforms. +
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOD = 8
+
Number of bytes transferred from one device to another device. This is only supported on 64 bit Linux platforms.
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_COUNT
+
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityUnifiedMemoryCounterScope
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_UNKNOWN = 0
+
The unified memory counter scope is not known.
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE = 1
+
Collect unified memory counter for single process on one device
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_ALL_DEVICES = 2
+
Collect unified memory counter for single process across all devices
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_COUNT
+
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ActivityUnifiedMemoryMigrationCause
+
+ +
+
+ Values + +
+
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_UNKNOWN = 0
+
The unified memory migration cause is not known
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_USER = 1
+
The unified memory migrated due to an explicit call from the user e.g. cudaMemPrefetchAsync
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_COHERENCE = 2
+
The unified memory migrated to guarantee data coherence e.g. CPU/GPU faults on Pascal+ and kernel launch on pre-Pascal GPUs + +
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_PREFETCH = 3
+
The unified memory was speculatively migrated by the UVM driver before being accessed by the destination processor to improve + performance +
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_EVICTION = 4
+
The unified memory migrated to the CPU because it was evicted to make room for another block of memory on the GPU
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_MIGRATION_CAUSE_ACCESS_COUNTERS = 5
+
The unified memory migrated to another processor because of access counter notifications. Only frequently accessed pages are + migrated between CPU and GPU, or between peer GPUs. +
+
+
+
+
+ enum CUpti_ActivityUnifiedMemoryRemoteMapCause
+
+ +
+
+ Values + +
+
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_UNKNOWN = 0
+
The cause of mapping to remote memory was unknown
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_COHERENCE = 1
+
Mapping to remote memory was added to maintain data coherence.
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_THRASHING = 2
+
Mapping to remote memory was added to prevent further thrashing
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_POLICY = 3
+
Mapping to remote memory was added to enforce the hints specified by the programmer or by performance heuristics of the UVM + driver +
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_OUT_OF_MEMORY = 4
+
Mapping to remote memory was added because there is no more memory available on the processor and eviction was not possible + +
+
CUPTI_ACTIVITY_UNIFIED_MEMORY_REMOTE_MAP_CAUSE_EVICTION = 5
+
Mapping to remote memory was added after the memory was evicted to make room for another block of memory on the GPU
+
+
+
+
+ enum CUpti_DevType
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_DEV_TYPE_INVALID = 0
+
+
CUPTI_DEV_TYPE_GPU = 1
+
The device type is GPU.
+
CUPTI_DEV_TYPE_NPU = 2
+
The device type is NVLink processing unit in CPU.
+
CUPTI_DEV_TYPE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_DeviceVirtualizationMode
+
+
+

This indicates the virtualization mode in which CUDA device is running

+
+
+
+ Values + +
+
+
CUPTI_DEVICE_VIRTUALIZATION_MODE_NONE = 0
+
No virtualization mode isassociated with the device i.e. it's a baremetal GPU
+
CUPTI_DEVICE_VIRTUALIZATION_MODE_PASS_THROUGH = 1
+
The device is associated with the pass-through GPU. In this mode, an entire physical GPU is directly assigned to one virtual + machine (VM). +
+
CUPTI_DEVICE_VIRTUALIZATION_MODE_VIRTUAL_GPU = 2
+
The device is associated with the virtual GPU (vGPU). In this mode multiple virtual machines (VMs) have simultaneous, direct + access to a single physical GPU. +
+
CUPTI_DEVICE_VIRTUALIZATION_MODE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_EnvironmentClocksThrottleReason
+
+
+

The possible reasons that a clock can be throttled. There can be more than one reason that a clock is being throttled so these + types can be combined by bitwise OR. These are used in the clocksThrottleReason field in the Environment Activity Record. + +

+
+
+
+ Values + +
+
+
CUPTI_CLOCKS_THROTTLE_REASON_GPU_IDLE = 0x00000001
+
Nothing is running on the GPU and the clocks are dropping to idle state.
+
CUPTI_CLOCKS_THROTTLE_REASON_USER_DEFINED_CLOCKS = 0x00000002
+
The GPU clocks are limited by a user specified limit.
+
CUPTI_CLOCKS_THROTTLE_REASON_SW_POWER_CAP = 0x00000004
+
A software power scaling algorithm is reducing the clocks below requested clocks.
+
CUPTI_CLOCKS_THROTTLE_REASON_HW_SLOWDOWN = 0x00000008
+
Hardware slowdown to reduce the clock by a factor of two or more is engaged. This is an indicator of one of the following: + 1) Temperature is too high, 2) External power brake assertion is being triggered (e.g. by the system power supply), 3) Change + in power state. +
+
CUPTI_CLOCKS_THROTTLE_REASON_UNKNOWN = 0x80000000
+
Some unspecified factor is reducing the clocks.
+
CUPTI_CLOCKS_THROTTLE_REASON_UNSUPPORTED = 0x40000000
+
Throttle reason is not supported for this GPU.
+
CUPTI_CLOCKS_THROTTLE_REASON_NONE = 0x00000000
+
No clock throttling.
+
CUPTI_CLOCKS_THROTTLE_REASON_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ExternalCorrelationKind
+
+
+

Custom correlation kinds are reserved for usage in external tools.

+

See also:

+

CUpti_ActivityExternalCorrelation

+

+
+
+
+ Values + +
+
+
CUPTI_EXTERNAL_CORRELATION_KIND_INVALID = 0
+
+
CUPTI_EXTERNAL_CORRELATION_KIND_UNKNOWN = 1
+
The external API is unknown to CUPTI
+
CUPTI_EXTERNAL_CORRELATION_KIND_OPENACC = 2
+
The external API is OpenACC
+
CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM0 = 3
+
The external API is custom0
+
CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM1 = 4
+
The external API is custom1
+
CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM2 = 5
+
The external API is custom2
+
CUPTI_EXTERNAL_CORRELATION_KIND_SIZE
+
Add new kinds before this line
+
CUPTI_EXTERNAL_CORRELATION_KIND_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_FuncShmemLimitConfig
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_FUNC_SHMEM_LIMIT_DEFAULT = 0x00
+
+
CUPTI_FUNC_SHMEM_LIMIT_OPTIN = 0x01
+
+
CUPTI_FUNC_SHMEM_LIMIT_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_LinkFlag
+
+
+

Describes link properties, to be used with CUpti_ActivityNvLink. +

+
+
+
+ Values + +
+
+
CUPTI_LINK_FLAG_INVALID = 0
+
+
CUPTI_LINK_FLAG_PEER_ACCESS = (1<<1)
+
Is peer to peer access supported by this link.
+
CUPTI_LINK_FLAG_SYSMEM_ACCESS = (1<<2)
+
Is system memory access supported by this link.
+
CUPTI_LINK_FLAG_PEER_ATOMICS = (1<<3)
+
Is peer atomic access supported by this link.
+
CUPTI_LINK_FLAG_SYSMEM_ATOMICS = (1<<4)
+
Is system memory atomic access supported by this link.
+
CUPTI_LINK_FLAG_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_OpenAccConstructKind
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_UNKNOWN = 0
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_PARALLEL = 1
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_KERNELS = 2
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_LOOP = 3
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_DATA = 4
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_ENTER_DATA = 5
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_EXIT_DATA = 6
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_HOST_DATA = 7
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_ATOMIC = 8
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_DECLARE = 9
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_INIT = 10
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_SHUTDOWN = 11
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_SET = 12
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_UPDATE = 13
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_ROUTINE = 14
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_WAIT = 15
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_RUNTIME_API = 16
+
+
CUPTI_OPENACC_CONSTRUCT_KIND_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_OpenAccEventKind
+
+
+

See also:

+

CUpti_ActivityKindOpenAcc

+

+
+
+
+ Values + +
+
+
CUPTI_OPENACC_EVENT_KIND_INVALID = 0
+
+
CUPTI_OPENACC_EVENT_KIND_DEVICE_INIT = 1
+
+
CUPTI_OPENACC_EVENT_KIND_DEVICE_SHUTDOWN = 2
+
+
CUPTI_OPENACC_EVENT_KIND_RUNTIME_SHUTDOWN = 3
+
+
CUPTI_OPENACC_EVENT_KIND_ENQUEUE_LAUNCH = 4
+
+
CUPTI_OPENACC_EVENT_KIND_ENQUEUE_UPLOAD = 5
+
+
CUPTI_OPENACC_EVENT_KIND_ENQUEUE_DOWNLOAD = 6
+
+
CUPTI_OPENACC_EVENT_KIND_WAIT = 7
+
+
CUPTI_OPENACC_EVENT_KIND_IMPLICIT_WAIT = 8
+
+
CUPTI_OPENACC_EVENT_KIND_COMPUTE_CONSTRUCT = 9
+
+
CUPTI_OPENACC_EVENT_KIND_UPDATE = 10
+
+
CUPTI_OPENACC_EVENT_KIND_ENTER_DATA = 11
+
+
CUPTI_OPENACC_EVENT_KIND_EXIT_DATA = 12
+
+
CUPTI_OPENACC_EVENT_KIND_CREATE = 13
+
+
CUPTI_OPENACC_EVENT_KIND_DELETE = 14
+
+
CUPTI_OPENACC_EVENT_KIND_ALLOC = 15
+
+
CUPTI_OPENACC_EVENT_KIND_FREE = 16
+
+
CUPTI_OPENACC_EVENT_KIND_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_PcieDeviceType
+
+
+

Field to differentiate whether PCIE Activity record is of a GPU or a PCI Bridge

+
+
+
+ Values + +
+
+
CUPTI_PCIE_DEVICE_TYPE_GPU = 0
+
PCIE GPU record
+
CUPTI_PCIE_DEVICE_TYPE_BRIDGE = 1
+
PCIE Bridge record
+
CUPTI_PCIE_DEVICE_TYPE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_PcieGen
+
+
+

Enumeration of PCIE Generation for pcie activity attribute pcieGeneration

+
+
+
+ Values + +
+
+
CUPTI_PCIE_GEN_GEN1 = 1
+
PCIE Generation 1
+
CUPTI_PCIE_GEN_GEN2 = 2
+
PCIE Generation 2
+
CUPTI_PCIE_GEN_GEN3 = 3
+
PCIE Generation 3
+
CUPTI_PCIE_GEN_GEN4 = 4
+
PCIE Generation 4
+
CUPTI_PCIE_GEN_GEN5 = 5
+
PCIE Generation 5
+
CUPTI_PCIE_GEN_FORCE_INT = 0x7fffffff
+
+
+
+
+
+
+
+

Functions

+
+
+ CUptiResult cuptiActivityConfigurePCSampling ( CUcontext ctx, CUpti_ActivityPCSamplingConfig* config )
+
+
Set PC sampling configuration.
+
+
+ Parameters + +
+
+
ctx
+
The context
+
config
+
A pointer to CUpti_ActivityPCSamplingConfig structure containing PC sampling configuration. +
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if this api is called while some valid event collection method is set.

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if config is NULL or any parameter in the config structures is not a valid value +

    +
  • +
  • CUPTI_ERROR_NOT_SUPPORTED +

    Indicates that the system/device does not support the unified memory counters

    +
  • +
+

+
+
+
Description
+

For Pascal and older GPU architectures this API must be called before enabling activity kind CUPTI_ACTIVITY_KIND_PC_SAMPLING. + There is no such requirement for Volta and newer GPU architectures. +

+

For Volta and newer GPU architectures if this API is called in the middle of execution, PC sampling configuration will be + updated for subsequent kernel launches. +

+

+
+
+
+ CUptiResult cuptiActivityConfigureUnifiedMemoryCounter ( CUpti_ActivityUnifiedMemoryCounterConfig* config, uint32_t count )
+
+
Set Unified Memory Counter configuration.
+
+
+ Parameters + +
+
+
config
+
A pointer to CUpti_ActivityUnifiedMemoryCounterConfig structures containing Unified Memory counter configuration. +
+
count
+
Number of Unified Memory counter configuration structures
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if config is NULL or any parameter in the config structures is not a valid value +

    +
  • +
  • CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED +

    One potential reason is that platform (OS/arch) does not support the unified memory counters

    +
  • +
  • CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE +

    Indicates that the device does not support the unified memory counters

    +
  • +
  • CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES +

    Indicates that multi-GPU configuration without P2P support between any pair of devices does not support the unified memory + counters +

    +
  • +
+

+
+
+
Description
+

+
+
+
+ CUptiResult cuptiActivityDisable ( CUpti_ActivityKind kind )
+
+
Disable collection of a specific kind of activity record.
+
+
+ Parameters + +
+
+
kind
+
The kind of activity record to stop collecting
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_KIND +

    if the activity kind is not supported

    +
  • +
+

+
+
+
Description
+

Disable collection of a specific kind of activity record. Multiple kinds can be disabled by calling this function multiple + times. By default all activity kinds are disabled for collection. +

+

+
+
+
+ CUptiResult cuptiActivityDisableContext ( CUcontext context, CUpti_ActivityKind kind )
+
+
Disable collection of a specific kind of activity record for a context.
+
+
+ Parameters + +
+
+
context
+
The context for which activity is to be disabled
+
kind
+
The kind of activity record to stop collecting
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_KIND +

    if the activity kind is not supported

    +
  • +
+

+
+
+
Description
+

Disable collection of a specific kind of activity record for a context. This setting done by this API will supersede the global + settings for activity records. Multiple kinds can be enabled by calling this function multiple times. +

+

+
+
+
+ CUptiResult cuptiActivityEnable ( CUpti_ActivityKind kind )
+
+
Enable collection of a specific kind of activity record.
+
+
+ Parameters + +
+
+
kind
+
The kind of activity record to collect
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_NOT_COMPATIBLE +

    if the activity kind cannot be enabled

    +
  • +
  • CUPTI_ERROR_INVALID_KIND +

    if the activity kind is not supported

    +
  • +
+

+
+
+
Description
+

Enable collection of a specific kind of activity record. Multiple kinds can be enabled by calling this function multiple times. + By default all activity kinds are disabled for collection. +

+

+
+
+
+ CUptiResult cuptiActivityEnableAndDump ( CUpti_ActivityKind kind )
+
+
Enable collection of a specific kind of activity record. For certain activity kinds it dumps existing records.
+
+
+ Parameters + +
+
+
kind
+
The kind of activity record to collect
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_UNKNOWN +

    if buffer is not initialized.

    +
  • +
  • CUPTI_ERROR_NOT_COMPATIBLE +

    if the activity kind cannot be enabled

    +
  • +
  • CUPTI_ERROR_INVALID_KIND +

    if the activity kind is not supported

    +
  • +
+

+
+
+
Description
+

In general, the behavior of this API is similar to the API cuptiActivityEnable i.e. it enables the collection of a specific kind of activity record. Additionally, this API can help in dumping the records + for activities which happened in the past before enabling the corresponding activity kind. The API allows to get records for + the current resource allocations done in CUDA For CUPTI_ACTIVITY_KIND_DEVICE, existing device records are dumped For CUPTI_ACTIVITY_KIND_CONTEXT, + existing context records are dumped For CUPTI_ACTIVITY_KIND_STREAM, existing stream records are dumped For CUPTI_ACTIVITY_KIND_ + NVLINK, existing NVLINK records are dumped For CUPTI_ACTIVITY_KIND_PCIE, existing PCIE records are dumped For other activities, + the behavior is similar to the API cuptiActivityEnable

+

Device records are emitted in CUPTI on CUDA driver initialization. Those records can only be retrieved by the user if CUPTI + is attached before CUDA initialization. Context and stream records are emitted on context and stream creation. The use case + of the API is to provide the records for CUDA resources (contexs/streams/devices) that are currently active if user late attaches + CUPTI. +

+

Before calling this function, the user must register buffer callbacks to get the activity records by calling cuptiActivityRegisterCallbacks. If the user does not register the buffers and calls API cuptiActivityEnableAndDump, then CUPTI will enable the activity kind but not provide any records for that activity kind. +

+

+
+
+
+ CUptiResult cuptiActivityEnableContext ( CUcontext context, CUpti_ActivityKind kind )
+
+
Enable collection of a specific kind of activity record for a context.
+
+
+ Parameters + +
+
+
context
+
The context for which activity is to be enabled
+
kind
+
The kind of activity record to collect
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_NOT_COMPATIBLE +

    if the activity kind cannot be enabled

    +
  • +
  • CUPTI_ERROR_INVALID_KIND +

    if the activity kind is not supported

    +
  • +
+

+
+
+
Description
+

Enable collection of a specific kind of activity record for a context. This setting done by this API will supersede the global + settings for activity records enabled by cuptiActivityEnable. Multiple kinds can be enabled by calling this function multiple times. +

+

+
+
+
+ CUptiResult cuptiActivityEnableLatencyTimestamps ( uint8_t enable )
+
+
Controls the collection of queued and submitted timestamps for kernels.
+
+
+ Parameters + +
+
+
enable
+
is a boolean, denoting whether these timestamps should be collected
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
+

+
+
+
Description
+

This API is used to control the collection of queued and submitted timestamps for kernels whose records are provided through + the struct CUpti_ActivityKernel8. Default value is 0, i.e. these timestamps are not collected. This API needs to be called before initialization of CUDA and + this setting should not be changed during the profiling session. +

+

+
+
+
+ CUptiResult cuptiActivityEnableLaunchAttributes ( uint8_t enable )
+
+
Controls the collection of launch attributes for kernels.
+
+
+ Parameters + +
+
+
enable
+
is a boolean denoting whether these launch attributes should be collected
+
+
+
+
Description
+

This API is used to control the collection of launch attributes for kernels whose records are provided through the struct + CUpti_ActivityKernel8. Default value is 0, i.e. these attributes are not collected. +

+

+
+
+
+ CUptiResult cuptiActivityFlush ( CUcontext context, uint32_t streamId, uint32_t flag )
+
+
Wait for all activity records to be delivered via the completion callback.
+
+
+ Parameters + +
+
+
context
+
A valid CUcontext or NULL.
+
streamId
+
The stream ID.
+
flag
+
The flag can be set to indicate a forced flush. See CUpti_ActivityFlag
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_CUPTI_ERROR_INVALID_OPERATION +

    if not preceeded by a successful call to cuptiActivityRegisterCallbacks

    +
  • +
  • CUPTI_ERROR_UNKNOWN +

    an internal error occurred

    +
  • +
+

+
+
+
Description
+

This function does not return until all activity records associated with the specified context/stream are returned to the + CUPTI client using the callback registered in cuptiActivityRegisterCallbacks. To ensure that all activity records are complete, + the requested stream(s), if any, are synchronized. +

+

If context is NULL, the global activity records (i.e. those not associated with a particular stream) are flushed (in this case no streams + are synchonized). If context is a valid CUcontext and streamId is 0, the buffers of all streams of this context are flushed. Otherwise, the buffers of the specified stream in this context + is flushed. +

+

Before calling this function, the buffer handling callback api must be activated by calling cuptiActivityRegisterCallbacks.

+

+ + **DEPRECATED** This method is deprecated CONTEXT and STREAMID will be ignored. Use cuptiActivityFlushAll to flush all data. + +

+
+
+
+ CUptiResult cuptiActivityFlushAll ( uint32_t flag )
+
+
Request to deliver activity records via the buffer completion callback.
+
+
+ Parameters + +
+
+
flag
+
The flag can be set to indicate a forced flush. See CUpti_ActivityFlag
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if not preceeded by a successful call to cuptiActivityRegisterCallbacks

    +
  • +
  • CUPTI_ERROR_UNKNOWN +

    an internal error occurred

    +
  • +
+

+
+
+
Description
+

This function returns the activity records associated with all contexts/streams (and the global buffers not associated with + any stream) to the CUPTI client using the callback registered in cuptiActivityRegisterCallbacks. +

+

This is a blocking call but it doesn't issue any CUDA synchronization calls implicitly thus it's not guaranteed that all activities + are completed on the underlying devices. Activity record is considered as completed if it has all the information filled up + including the timestamps if any. It is the client's responsibility to issue necessary CUDA synchronization calls before calling + this function if all activity records with complete information are expected to be delivered. +

+

Behavior of the function based on the input flag: +

    +
  • +

    For default flush i.e. when flag is set as 0, it returns all the activity buffers which have all the activity records completed, + buffers need not to be full though. It doesn't return buffers which have one or more incomplete records. Default flush can + be done at a regular interval in a separate thread. +

    +
  • +
  • +

    For forced flush i.e. when flag CUPTI_ACTIVITY_FLAG_FLUSH_FORCED is passed to the function, it returns all the activity buffers + including the ones which have one or more incomplete activity records. It's suggested for clients to do the force flush before + the termination of the profiling session to allow remaining buffers to be delivered. In general, it can be done in the at-exit + handler. +

    +
  • +
+

+

Before calling this function, the buffer handling callback api must be activated by calling cuptiActivityRegisterCallbacks.

+

+

See also:

+

cuptiActivityFlushPeriod

+

+
+
+
+ CUptiResult cuptiActivityFlushPeriod ( uint32_t time )
+
+
Sets the flush period for the worker thread.
+
+
+ Parameters + +
+
+
time
+
flush period in msec
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
+

+
+
+
Description
+

CUPTI creates a worker thread to minimize the perturbance for the application created threads. CUPTI offloads certain operations + from the application threads to the worker thread, this includes synchronization of profiling resources between host and device, + delivery of the activity buffers to the client using the callback registered in cuptiActivityRegisterCallbacks. For performance + reasons, CUPTI wakes up the worker thread based on certain heuristics. +

+

This API is used to control the flush period of the worker thread. This setting will override the CUPTI heurtistics. Setting + time to zero disables the periodic flush and restores the default behavior. +

+

Periodic flush can return only those activity buffers which are full and have all the activity records completed.

+

It's allowed to use the API cuptiActivityFlushAll to flush the data on-demand, even when client sets the periodic flush. +

+

+

See also:

+

cuptiActivityFlushAll

+

+
+
+
+ CUptiResult cuptiActivityGetAttribute ( CUpti_ActivityAttribute attr, size_t* valueSize, void* value )
+
+
Read an activity API attribute.
+
+
+ Parameters + +
+
+
attr
+
The attribute to read
+
valueSize
+
Size of buffer pointed by the value, and returns the number of bytes written to value
+
value
+
Returns the value of the attribute
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if valueSize or value is NULL, or if attr is not an activity attribute +

    +
  • +
  • CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT +

    Indicates that the value buffer is too small to hold the attribute value. +

    +
  • +
+

+
+
+
Description
+

Read an activity API attribute and return it in *value. +

+

+
+
+
+ CUptiResult cuptiActivityGetNextRecord ( uint8_t* buffer, size_t validBufferSizeBytes, CUpti_Activity** record )
+
+
Iterate over the activity records in a buffer.
+
+
+ Parameters + +
+
+
buffer
+
The buffer containing activity records
+
validBufferSizeBytes
+
The number of valid bytes in the buffer.
+
record
+
Inputs the previous record returned by cuptiActivityGetNextRecord and returns the next activity record from the buffer. If + input value is NULL, returns the first activity record in the buffer. Records of kind CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL + may contain invalid (0) timestamps, indicating that no timing information could be collected for lack of device memory. +
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_MAX_LIMIT_REACHED +

    if no more records in the buffer

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if buffer is NULL. +

    +
  • +
+

+
+
+
Description
+

This is a helper function to iterate over the activity records in a buffer. A buffer of activity records is typically obtained + by receiving a CUpti_BuffersCallbackCompleteFunc callback. +

+

An example of typical usage:

CUpti_Activity *record = NULL;
+       CUptiResult status = CUPTI_SUCCESS;
+         do {
+            status = cuptiActivityGetNextRecord(buffer, validSize, &record);
+            if(status == CUPTI_SUCCESS) {
+                 // Use record here...
+            }
+            else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED)
+                break;
+            else {
+                goto Error;
+            }
+          } while (1);

+

+
+
+
+ CUptiResult cuptiActivityGetNumDroppedRecords ( CUcontext context, uint32_t streamId, size_t* dropped )
+
+
Get the number of activity records that were dropped of insufficient buffer space.
+
+
+ Parameters + +
+
+
context
+
The context, or NULL to get dropped count from global queue
+
streamId
+
The stream ID
+
dropped
+
The number of records that were dropped since the last call to this function.
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if dropped is NULL +

    +
  • +
+

+
+
+
Description
+

Get the number of records that were dropped because of insufficient buffer space. The dropped count includes records that + could not be recorded because CUPTI did not have activity buffer space available for the record (because the CUpti_BuffersCallbackRequestFunc + callback did not return an empty buffer of sufficient size) and also CDP records that could not be record because the device-size + buffer was full (size is controlled by the CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE_CDP attribute). The dropped count maintained + for the queue is reset to zero when this function is called. +

+

+
+
+
+ CUptiResult cuptiActivityPopExternalCorrelationId ( CUpti_ExternalCorrelationKind kind, uint64_t* lastId )
+
+
Pop an external correlation id for the calling thread.
+
+
+ Parameters + +
+
+
kind
+
The kind of external API activities should be correlated with.
+
lastId
+
If the function returns successful, contains the last external correlation id for this kind, can be NULL. +
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    The external API kind is invalid.

    +
  • +
  • CUPTI_ERROR_QUEUE_EMPTY +

    No external id is currently associated with kind. +

    +
  • +
+

+
+
+
Description
+

This function notifies CUPTI that the calling thread is leaving an external API region.

+

+
+
+
+ CUptiResult cuptiActivityPushExternalCorrelationId ( CUpti_ExternalCorrelationKind kind, uint64_t id )
+
+
Push an external correlation id for the calling thread.
+
+
+ Parameters + +
+
+
kind
+
The kind of external API activities should be correlated with.
+
id
+
External correlation id.
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    The external API kind is invalid

    +
  • +
+

+
+
+
Description
+

This function notifies CUPTI that the calling thread is entering an external API region. When a CUPTI activity API record + is created while within an external API region and CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION is enabled, the activity API record + will be preceeded by a CUpti_ActivityExternalCorrelation record for each CUpti_ExternalCorrelationKind. +

+

+
+
+
+ CUptiResult cuptiActivityRegisterCallbacks ( CUpti_BuffersCallbackRequestFunc funcBufferRequested, CUpti_BuffersCallbackCompleteFunc funcBufferCompleted )
+
+
Registers callback functions with CUPTI for activity buffer handling.
+
+
+ Parameters + +
+
+
funcBufferRequested
+
callback which is invoked when an empty buffer is requested by CUPTI
+
funcBufferCompleted
+
callback which is invoked when a buffer containing activity records is available from CUPTI
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if either funcBufferRequested or funcBufferCompleted is NULL +

    +
  • +
+

+
+
+
Description
+

This function registers two callback functions to be used in asynchronous buffer handling. If registered, activity record + buffers are handled using asynchronous requested/completed callbacks from CUPTI. +

+

Registering these callbacks prevents the client from using CUPTI's blocking enqueue/dequeue functions.

+

+
+
+
+ CUptiResult cuptiActivityRegisterTimestampCallback ( CUpti_TimestampCallbackFunc funcTimestamp )
+
+
Registers callback function with CUPTI for providing timestamp.
+
+
+ Parameters + +
+
+
funcTimestamp
+
callback which is invoked when a timestamp is needed by CUPTI
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if funcTimestamp is NULL +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
+

+
+
+
Description
+

This function registers a callback function to obtain timestamp of user's choice instead of using CUPTI provided timestamp. + By default CUPTI uses different methods, based on the underlying platform, to retrieve the timestamp Linux and Android use + clock_gettime(CLOCK_REALTIME, ..) Windows uses QueryPerformanceCounter() Mac uses mach_absolute_time() QNX uses ClockCycles() + Timestamps retrieved using these methods are converted to nanosecond if needed before usage. +

+

The registration of timestamp callback should be done before any of the CUPTI activity kinds are enabled to make sure that + all the records report the timestamp using the callback function registered through cuptiActivityRegisterTimestampCallback + API. +

+

Changing the timestamp callback function in CUPTI through cuptiActivityRegisterTimestampCallback API in the middle of the + profiling session can cause records generated prior to the change to report timestamps through previous timestamp method. +

+

+
+
+
+ CUptiResult cuptiActivitySetAttribute ( CUpti_ActivityAttribute attr, size_t* valueSize, void* value )
+
+
Write an activity API attribute.
+
+
+ Parameters + +
+
+
attr
+
The attribute to write
+
valueSize
+
The size, in bytes, of the value
+
value
+
The attribute value to write
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if valueSize or value is NULL, or if attr is not an activity attribute +

    +
  • +
  • CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT +

    Indicates that the value buffer is too small to hold the attribute value. +

    +
  • +
+

+
+
+
Description
+

Write an activity API attribute.

+

+
+
+
+ CUptiResult cuptiComputeCapabilitySupported ( int  major, int  minor, int* support )
+
+
Check support for a compute capability.
+
+
+ Parameters + +
+
+
major
+
The major revision number of the compute capability
+
minor
+
The minor revision number of the compute capability
+
support
+
Pointer to an integer to return the support status
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if support is NULL +

    +
  • +
+

+
+
+
Description
+

This function is used to check the support for a device based on it's compute capability. It sets the support when the compute capability is supported by the current version of CUPTI, and clears it otherwise. This version of CUPTI + might not support all GPUs sharing the same compute capability. It is suggested to use API cuptiDeviceSupported which provides correct information. +

+

+

See also:

+

cuptiDeviceSupported

+

+
+
+
+ CUptiResult cuptiDeviceSupported ( CUdevice dev, int* support )
+
+
Check support for a compute device.
+
+
+ Parameters + +
+
+
dev
+
The device handle returned by CUDA Driver API cuDeviceGet
+
support
+
Pointer to an integer to return the support status
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if support is NULL +

    +
  • +
  • CUPTI_ERROR_INVALID_DEVICE +

    if dev is not a valid device +

    +
  • +
+

+
+
+
Description
+

This function is used to check the support for a compute device. It sets the support when the device is supported by the current version of CUPTI, and clears it otherwise. +

+

+

See also:

+

cuptiComputeCapabilitySupported

+

+
+
+
+ CUptiResult cuptiDeviceVirtualizationMode ( CUdevice dev, CUpti_DeviceVirtualizationMode* mode )
+
+
Query the virtualization mode of the device.
+
+
+ Parameters + +
+
+
dev
+
The device handle returned by CUDA Driver API cuDeviceGet
+
mode
+
Pointer to an CUpti_DeviceVirtualizationMode to return the virtualization mode
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_DEVICE +

    if dev is not a valid device +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if mode is NULL +

    +
  • +
+

+
+
+
Description
+

This function is used to query the virtualization mode of the CUDA device.

+

+
+
+
+ CUptiResult cuptiFinalize ( void )
+
+
Detach CUPTI from the running process.
+
+
Description
+

This API detaches the CUPTI from the running process. It destroys and cleans up all the resources associated with CUPTI in + the current process. After CUPTI detaches from the process, the process will keep on running with no CUPTI attached to it. + For safe operation of the API, it is recommended this API is invoked from the exit callsite of any of the CUDA Driver or Runtime + API. Otherwise CUPTI client needs to make sure that required CUDA synchronization and CUPTI activity buffer flush is done + before calling the API. Sample code showing the usage of the API in the cupti callback handler code:

‎    void CUPTIAPI
+          cuptiCallbackHandler(void *userdata, CUpti_CallbackDomain domain,
+              CUpti_CallbackId cbid, void *cbdata)
+          {
+              const CUpti_CallbackData *cbInfo = (CUpti_CallbackData *)cbdata;
+      
+              // Take this code path when CUPTI detach is requested
+              if (detachCupti) {
+                  switch(domain)
+                  {
+                  case CUPTI_CB_DOMAIN_RUNTIME_API:
+                  case CUPTI_CB_DOMAIN_DRIVER_API:
+                      if (cbInfo->callbackSite == CUPTI_API_EXIT) {
+                          // call the CUPTI detach API
+                          cuptiFinalize();
+                      }
+                      break;
+                  default:
+                      break;
+                  }
+              }
+          }

+
+
+
+ CUptiResult cuptiGetAutoBoostState ( CUcontext context, CUpti_ActivityAutoBoostState* state )
+
+
Get auto boost state.
+
+
+ Parameters + +
+
+
context
+
A valid CUcontext.
+
state
+
A pointer to CUpti_ActivityAutoBoostState structure which contains the current state and the id of the process that has requested the current state +
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if CUcontext or state is NULL +

    +
  • +
  • CUPTI_ERROR_NOT_SUPPORTED +

    Indicates that the device does not support auto boost

    +
  • +
  • CUPTI_ERROR_UNKNOWN +

    an internal error occurred

    +
  • +
+

+
+
+
Description
+

The profiling results can be inconsistent in case auto boost is enabled. CUPTI tries to disable auto boost while profiling. + It can fail to disable in cases where user does not have the permissions or CUDA_AUTO_BOOST env variable is set. The function + can be used to query whether auto boost is enabled. +

+

+
+
+
+ CUptiResult cuptiGetContextId ( CUcontext context, uint32_t* contextId )
+
+
Get the ID of a context.
+
+
+ Parameters + +
+
+
context
+
The context
+
contextId
+
Returns a process-unique ID for the context
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_CONTEXT +

    The context is NULL or not valid.

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if contextId is NULL +

    +
  • +
+

+
+
+
Description
+

Get the ID of a context.

+

+
+
+
+ CUptiResult cuptiGetDeviceId ( CUcontext context, uint32_t* deviceId )
+
+
Get the ID of a device.
+
+
+ Parameters + +
+
+
context
+
The context, or NULL to indicate the current context.
+
deviceId
+
Returns the ID of the device that is current for the calling thread.
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_DEVICE +

    if unable to get device ID

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if deviceId is NULL +

    +
  • +
+

+
+
+
Description
+

If context is NULL, returns the ID of the device that contains the currently active context. If context is non-NULL, returns the ID of the device which contains that context. Operates in a similar manner to cudaGetDevice() or + cuCtxGetDevice() but may be called from within callback functions. +

+

+
+
+
+ CUptiResult cuptiGetGraphId ( CUgraph graph, uint32_t* pId )
+
+
Get the unique ID of graph.
+
+
+ Parameters + +
+
+
graph
+
The graph.
+
pId
+
Returns the unique ID of the graph
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if graph is NULL +

    +
  • +
+

+
+
+
Description
+

Returns the unique ID of CUDA graph.

+

+
+
+
+ CUptiResult cuptiGetGraphNodeId ( CUgraphNode node, uint64_t* nodeId )
+
+
Get the unique ID of a graph node.
+
+
+ Parameters + +
+
+
node
+
The graph node.
+
nodeId
+
Returns the unique ID of the node
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if node is NULL +

    +
  • +
+

+
+
+
Description
+

Returns the unique ID of the CUDA graph node.

+

+
+
+
+ CUptiResult cuptiGetLastError ( void )
+
+
Returns the last error from a cupti call or callback.
+
+
Description
+

Returns the last error that has been produced by any of the cupti api calls or the callback in the same host thread and resets + it to CUPTI_SUCCESS. +

+
+
+
+ CUptiResult cuptiGetStreamId ( CUcontext context, CUstream stream, uint32_t* streamId )
+
+
Get the ID of a stream.
+
+
+ Parameters + +
+
+
context
+
If non-NULL then the stream is checked to ensure that it belongs to this context. Typically this parameter should be null. + +
+
stream
+
The stream
+
streamId
+
Returns a context-unique ID for the stream
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_STREAM +

    if unable to get stream ID, or if context is non-NULL and stream does not belong to the context +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if streamId is NULL +

    +
  • +
+

+
+
+
Description
+

Get the ID of a stream. The stream ID is unique within a context (i.e. all streams within a context will have unique stream + IDs). +

+

+ + **DEPRECATED** This method is deprecated as of CUDA 8.0. Use method cuptiGetStreamIdEx instead. +

+
+
+
+ CUptiResult cuptiGetStreamIdEx ( CUcontext context, CUstream stream, uint8_t perThreadStream, uint32_t* streamId )
+
+
Get the ID of a stream.
+
+
+ Parameters + +
+
+
context
+
If non-NULL then the stream is checked to ensure that it belongs to this context. Typically this parameter should be null. + +
+
stream
+
The stream
+
perThreadStream
+
Flag to indicate if program is compiled for per-thread streams
+
streamId
+
Returns a context-unique ID for the stream
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_STREAM +

    if unable to get stream ID, or if context is non-NULL and stream does not belong to the context +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if streamId is NULL +

    +
  • +
+

+
+
+
Description
+

Get the ID of a stream. The stream ID is unique within a context (i.e. all streams within a context will have unique stream + IDs). +

+

+
+
+
+ CUptiResult cuptiGetThreadIdType ( CUpti_ActivityThreadIdType* type )
+
+
Get the thread-id type.
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if type is NULL +

    +
  • +
+

+
+
+
Description
+

Returns the thread-id type used in CUPTI

+

+
+
+
+ CUptiResult cuptiGetTimestamp ( uint64_t* timestamp )
+
+
Get the CUPTI timestamp.
+
+
+ Parameters + +
+
+
timestamp
+
Returns the CUPTI timestamp
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if timestamp is NULL +

    +
  • +
+

+
+
+
Description
+

Returns a timestamp normalized to correspond with the start and end timestamps reported in the CUPTI activity records. The + timestamp is reported in nanoseconds. +

+

+
+
+
+ CUptiResult cuptiSetThreadIdType ( CUpti_ActivityThreadIdType type )
+
+
Set the thread-id type.
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_SUPPORTED +

    if type is not supported on the platform +

    +
  • +
+

+
+
+
Description
+

CUPTI uses the method corresponding to set type to generate the thread-id. See enum CUpti_ActivityThreadIdType for the list of methods. Activity records having thread-id field contain the same value. Thread id type must not be changed + during the profiling session to avoid thread-id value mismatch across activity records. +

+

+
+
+
+
+
+
+

5.4. CUPTI Callback API

+
+

Functions, types, and enums that implement the CUPTI Callback API.

+
+

Classes

+
+
struct  +
+
Data passed into a runtime or driver API callback function.
+
struct  +
+
CUDA graphs data passed into a resource callback function.
+
struct  +
+
Module data passed into a resource callback function.
+
struct  +
+
Data passed into a NVTX callback function.
+
struct  +
+
Data passed into a resource callback function.
+
struct  +
+
Data passed into a synchronize callback function.
+
+

Typedefs

+
+
typedef + void +  ( *CUpti_CallbackFunc )( void* +  userdata,  CUpti_CallbackDomain domain,  CUpti_CallbackId cbid, const void* +  cbdata )
+
Function type for a callback.
+
typedef uint32_t  CUpti_CallbackId
+
An ID for a driver API, runtime API, resource or synchronization callback.
+
typedef CUpti_CallbackDomain* CUpti_DomainTable
+
Pointer to an array of callback domains.
+
typedef CUpti_Subscriber_st *  CUpti_SubscriberHandle
+
A callback subscriber.
+
+

Enumerations

+
+
enum CUpti_ApiCallbackSite
+
Specifies the point in an API call that a callback is issued.
+
enum CUpti_CallbackDomain
+
Callback domains.
+
enum CUpti_CallbackIdResource
+
Callback IDs for resource domain.
+
enum CUpti_CallbackIdSync
+
Callback IDs for synchronization domain.
+
+

Functions

+
+
CUptiResult cuptiEnableAllDomains ( uint32_t enable, CUpti_SubscriberHandle subscriber )
+
Enable or disable all callbacks in all domains.
+
CUptiResult cuptiEnableCallback ( uint32_t enable, CUpti_SubscriberHandle subscriber, CUpti_CallbackDomain domain, CUpti_CallbackId cbid )
+
Enable or disabled callbacks for a specific domain and callback ID.
+
CUptiResult cuptiEnableDomain ( uint32_t enable, CUpti_SubscriberHandle subscriber, CUpti_CallbackDomain domain )
+
Enable or disabled all callbacks for a specific domain.
+
CUptiResult cuptiGetCallbackName ( CUpti_CallbackDomain domain, uint32_t cbid, const char** name )
+
Get the name of a callback for a specific domain and callback ID.
+
CUptiResult cuptiGetCallbackState ( uint32_t* enable, CUpti_SubscriberHandle subscriber, CUpti_CallbackDomain domain, CUpti_CallbackId cbid )
+
Get the current enabled/disabled state of a callback for a specific domain and function ID.
+
CUptiResult cuptiSubscribe ( CUpti_SubscriberHandle* subscriber, CUpti_CallbackFunc callback, void* userdata )
+
Initialize a callback subscriber with a callback function and user data.
+
CUptiResult cuptiSupportedDomains ( size_t* domainCount, CUpti_DomainTable* domainTable )
+
Get the available callback domains.
+
CUptiResult cuptiUnsubscribe ( CUpti_SubscriberHandle subscriber )
+
Unregister a callback subscriber.
+
+
+

Typedefs

+
+
+ + void + ( *CUpti_CallbackFunc )( void* +  userdata,  CUpti_CallbackDomain domain,  CUpti_CallbackId cbid, const void* +  cbdata )
+
+
+

Function type for a callback. Function type for a callback. The type of the data passed to the callback in cbdata depends on the domain. If domain is CUPTI_CB_DOMAIN_DRIVER_API or CUPTI_CB_DOMAIN_RUNTIME_API the type of cbdata will be CUpti_CallbackData. If domain is CUPTI_CB_DOMAIN_RESOURCE the type of cbdata will be CUpti_ResourceData. If domain is CUPTI_CB_DOMAIN_SYNCHRONIZE the type of cbdata will be CUpti_SynchronizeData. If domain is CUPTI_CB_DOMAIN_NVTX the type of cbdata will be CUpti_NvtxData. +

+

+
+
+
+
+ Parameters + +
+
+
userdata
+
User data supplied at subscription of the callback
+
CUpti_CallbackDomain domain
+
+
CUpti_CallbackId cbid
+
+
cbdata
+
Data passed to the callback.
+
+
+
+ typedef uint32_t CUpti_CallbackId
+
+
+

An ID for a driver API, runtime API, resource or synchronization callback. An ID for a driver API, runtime API, resource + or synchronization callback. Within a driver API callback this should be interpreted as a CUpti_driver_api_trace_cbid value + (these values are defined in cupti_driver_cbid.h). Within a runtime API callback this should be interpreted as a CUpti_runtime_api_trace_cbid + value (these values are defined in cupti_runtime_cbid.h). Within a resource API callback this should be interpreted as a CUpti_CallbackIdResource value. Within a synchronize API callback this should be interpreted as a CUpti_CallbackIdSync value. +

+
+
+
+ typedef CUpti_CallbackDomain* CUpti_DomainTable
+
+
+

Pointer to an array of callback domains.

+
+
+
+ typedef CUpti_Subscriber_st * CUpti_SubscriberHandle
+
+
+

A callback subscriber.

+
+
+
+
+
+

Enumerations

+
+
+ enum CUpti_ApiCallbackSite
+
+
+

Specifies the point in an API call that a callback is issued. This value is communicated to the callback function via CUpti_CallbackData::callbackSite. +

+
+
+
+ Values + +
+
+
CUPTI_API_ENTER = 0
+
The callback is at the entry of the API call.
+
CUPTI_API_EXIT = 1
+
The callback is at the exit of the API call.
+
CUPTI_API_CBSITE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_CallbackDomain
+
+
+

Callback domains. Each domain represents callback points for a group of related API functions or CUDA driver activity.

+
+
+
+ Values + +
+
+
CUPTI_CB_DOMAIN_INVALID = 0
+
Invalid domain.
+
CUPTI_CB_DOMAIN_DRIVER_API = 1
+
Domain containing callback points for all driver API functions.
+
CUPTI_CB_DOMAIN_RUNTIME_API = 2
+
Domain containing callback points for all runtime API functions.
+
CUPTI_CB_DOMAIN_RESOURCE = 3
+
Domain containing callback points for CUDA resource tracking.
+
CUPTI_CB_DOMAIN_SYNCHRONIZE = 4
+
Domain containing callback points for CUDA synchronization.
+
CUPTI_CB_DOMAIN_NVTX = 5
+
Domain containing callback points for NVTX API functions.
+
CUPTI_CB_DOMAIN_SIZE = 6
+
+
CUPTI_CB_DOMAIN_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_CallbackIdResource
+
+
+

Callback IDs for resource domain, CUPTI_CB_DOMAIN_RESOURCE. This value is communicated to the callback function via the cbid parameter. +

+
+
+
+ Values + +
+
+
CUPTI_CBID_RESOURCE_INVALID = 0
+
Invalid resource callback ID.
+
CUPTI_CBID_RESOURCE_CONTEXT_CREATED = 1
+
A new context has been created.
+
CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING = 2
+
A context is about to be destroyed.
+
CUPTI_CBID_RESOURCE_STREAM_CREATED = 3
+
A new stream has been created.
+
CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING = 4
+
A stream is about to be destroyed.
+
CUPTI_CBID_RESOURCE_CU_INIT_FINISHED = 5
+
The driver has finished initializing.
+
CUPTI_CBID_RESOURCE_MODULE_LOADED = 6
+
A module has been loaded.
+
CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING = 7
+
A module is about to be unloaded.
+
CUPTI_CBID_RESOURCE_MODULE_PROFILED = 8
+
The current module which is being profiled.
+
CUPTI_CBID_RESOURCE_GRAPH_CREATED = 9
+
CUDA graph has been created.
+
CUPTI_CBID_RESOURCE_GRAPH_DESTROY_STARTING = 10
+
CUDA graph is about to be destroyed.
+
CUPTI_CBID_RESOURCE_GRAPH_CLONED = 11
+
CUDA graph is cloned.
+
CUPTI_CBID_RESOURCE_GRAPHNODE_CREATE_STARTING = 12
+
CUDA graph node is about to be created
+
CUPTI_CBID_RESOURCE_GRAPHNODE_CREATED = 13
+
CUDA graph node is created.
+
CUPTI_CBID_RESOURCE_GRAPHNODE_DESTROY_STARTING = 14
+
CUDA graph node is about to be destroyed.
+
CUPTI_CBID_RESOURCE_GRAPHNODE_DEPENDENCY_CREATED = 15
+
Dependency on a CUDA graph node is created.
+
CUPTI_CBID_RESOURCE_GRAPHNODE_DEPENDENCY_DESTROY_STARTING = 16
+
Dependency on a CUDA graph node is destroyed.
+
CUPTI_CBID_RESOURCE_GRAPHEXEC_CREATE_STARTING = 17
+
An executable CUDA graph is about to be created.
+
CUPTI_CBID_RESOURCE_GRAPHEXEC_CREATED = 18
+
An executable CUDA graph is created.
+
CUPTI_CBID_RESOURCE_GRAPHEXEC_DESTROY_STARTING = 19
+
An executable CUDA graph is about to be destroyed.
+
CUPTI_CBID_RESOURCE_GRAPHNODE_CLONED = 20
+
CUDA graph node is cloned.
+
CUPTI_CBID_RESOURCE_SIZE
+
+
CUPTI_CBID_RESOURCE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_CallbackIdSync
+
+
+

Callback IDs for synchronization domain, CUPTI_CB_DOMAIN_SYNCHRONIZE. This value is communicated to the callback function + via the cbid parameter. +

+
+
+
+ Values + +
+
+
CUPTI_CBID_SYNCHRONIZE_INVALID = 0
+
Invalid synchronize callback ID.
+
CUPTI_CBID_SYNCHRONIZE_STREAM_SYNCHRONIZED = 1
+
Stream synchronization has completed for the stream.
+
CUPTI_CBID_SYNCHRONIZE_CONTEXT_SYNCHRONIZED = 2
+
Context synchronization has completed for the context.
+
CUPTI_CBID_SYNCHRONIZE_SIZE
+
+
CUPTI_CBID_SYNCHRONIZE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+
+
+

Functions

+
+
+ CUptiResult cuptiEnableAllDomains ( uint32_t enable, CUpti_SubscriberHandle subscriber )
+
+
Enable or disable all callbacks in all domains.
+
+
+ Parameters + +
+
+
enable
+
New enable state for all callbacks in all domain. Zero disables all callbacks, non-zero enables all callbacks.
+
subscriber
+
- Handle to callback subscription
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    on success

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    if unable to initialized CUPTI

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if subscriber is invalid +

    +
  • +
+

+
+
+
Description
+

Enable or disable all callbacks in all domains.

+

+

Note:

Thread-safety: a subscriber must serialize access to cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and cuptiEnableAllDomains. + For example, if cuptiGetCallbackState(sub, d, *) and cuptiEnableAllDomains(sub) are called concurrently, the results are undefined. +

+
+

+

+
+
+
+ CUptiResult cuptiEnableCallback ( uint32_t enable, CUpti_SubscriberHandle subscriber, CUpti_CallbackDomain domain, CUpti_CallbackId cbid )
+
+
Enable or disabled callbacks for a specific domain and callback ID.
+
+
+ Parameters + +
+
+
enable
+
New enable state for the callback. Zero disables the callback, non-zero enables the callback.
+
subscriber
+
- Handle to callback subscription
+
domain
+
The domain of the callback
+
cbid
+
The ID of the callback
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    on success

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    if unable to initialized CUPTI

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if subscriber, domain or cbid is invalid. +

    +
  • +
+

+
+
+
Description
+

Enable or disabled callbacks for a subscriber for a specific domain and callback ID.

+

+

Note:

Thread-safety: a subscriber must serialize access to cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and cuptiEnableAllDomains. + For example, if cuptiGetCallbackState(sub, d, c) and cuptiEnableCallback(sub, d, c) are called concurrently, the results are + undefined. +

+
+

+

+
+
+
+ CUptiResult cuptiEnableDomain ( uint32_t enable, CUpti_SubscriberHandle subscriber, CUpti_CallbackDomain domain )
+
+
Enable or disabled all callbacks for a specific domain.
+
+
+ Parameters + +
+
+
enable
+
New enable state for all callbacks in the domain. Zero disables all callbacks, non-zero enables all callbacks.
+
subscriber
+
- Handle to callback subscription
+
domain
+
The domain of the callback
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    on success

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    if unable to initialized CUPTI

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if subscriber or domain is invalid +

    +
  • +
+

+
+
+
Description
+

Enable or disabled all callbacks for a specific domain.

+

+

Note:

Thread-safety: a subscriber must serialize access to cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and cuptiEnableAllDomains. + For example, if cuptiGetCallbackEnabled(sub, d, *) and cuptiEnableDomain(sub, d) are called concurrently, the results are + undefined. +

+
+

+

+
+
+
+ CUptiResult cuptiGetCallbackName ( CUpti_CallbackDomain domain, uint32_t cbid, const char** name )
+
+
Get the name of a callback for a specific domain and callback ID.
+
+
+ Parameters + +
+
+
domain
+
The domain of the callback
+
cbid
+
The ID of the callback
+
name
+
Returns pointer to the name string on success, NULL otherwise
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    on success

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if name is NULL, or if domain or cbid is invalid. +

    +
  • +
+

+
+
+
Description
+

Returns a pointer to the name c_string in **name. +

+

+

Note:

Names are available only for the DRIVER and RUNTIME domains. +

+
+

+

+
+
+
+ CUptiResult cuptiGetCallbackState ( uint32_t* enable, CUpti_SubscriberHandle subscriber, CUpti_CallbackDomain domain, CUpti_CallbackId cbid )
+
+
Get the current enabled/disabled state of a callback for a specific domain and function ID.
+
+
+ Parameters + +
+
+
enable
+
Returns non-zero if callback enabled, zero if not enabled
+
subscriber
+
Handle to the initialize subscriber
+
domain
+
The domain of the callback
+
cbid
+
The ID of the callback
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    on success

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    if unable to initialized CUPTI

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if enabled is NULL, or if subscriber, domain or cbid is invalid. +

    +
  • +
+

+
+
+
Description
+

Returns non-zero in *enable if the callback for a domain and callback ID is enabled, and zero if not enabled. +

+

+

Note:

Thread-safety: a subscriber must serialize access to cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and cuptiEnableAllDomains. + For example, if cuptiGetCallbackState(sub, d, c) and cuptiEnableCallback(sub, d, c) are called concurrently, the results are + undefined. +

+
+

+

+
+
+
+ CUptiResult cuptiSubscribe ( CUpti_SubscriberHandle* subscriber, CUpti_CallbackFunc callback, void* userdata )
+
+
Initialize a callback subscriber with a callback function and user data.
+
+
+ Parameters + +
+
+
subscriber
+
Returns handle to initialize subscriber
+
callback
+
The callback function
+
userdata
+
A pointer to user data. This data will be passed to the callback function via the userdata paramater. +
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    on success

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    if unable to initialize CUPTI

    +
  • +
  • CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED +

    if there is already a CUPTI subscriber

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if subscriber is NULL +

    +
  • +
+

+
+
+
Description
+

Initializes a callback subscriber with a callback function and (optionally) a pointer to user data. The returned subscriber + handle can be used to enable and disable the callback for specific domains and callback IDs. +

+

+

Note:
    +
  • +

    Only a single subscriber can be registered at a time. To ensure that no other CUPTI client interrupts the profiling session, + it's the responsibility of all the CUPTI clients to call this function before starting the profling session. In case profiling + session is already started by another CUPTI client, this function returns the error code CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED. + Note that this function returns the same error when application is launched using NVIDIA tools like nvprof, Visual Profiler, + Nsight Systems, Nsight Compute, cuda-gdb and cuda-memcheck. +

    +
  • +
  • +

    This function does not enable any callbacks.

    +
  • +
  • +

    Thread-safety: this function is thread safe. +

    +
  • +
+
+

+

+
+
+
+ CUptiResult cuptiSupportedDomains ( size_t* domainCount, CUpti_DomainTable* domainTable )
+
+
Get the available callback domains.
+
+
+ Parameters + +
+
+
domainCount
+
Returns number of callback domains
+
domainTable
+
Returns pointer to array of available callback domains
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    on success

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    if unable to initialize CUPTI

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if domainCount or domainTable are NULL +

    +
  • +
+

+
+
+
Description
+

Returns in *domainTable an array of size *domainCount of all the available callback domains. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiUnsubscribe ( CUpti_SubscriberHandle subscriber )
+
+
Unregister a callback subscriber.
+
+
+ Parameters + +
+
+
subscriber
+
Handle to the initialize subscriber
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    on success

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    if unable to initialized CUPTI

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if subscriber is NULL or not initialized +

    +
  • +
+

+
+
+
Description
+

Removes a callback subscriber so that no future callbacks will be issued to that subscriber.

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+
+
+
+

5.5. CUPTI Event API

+
+

Functions, types, and enums that implement the CUPTI Event API.

+

+

Note:

CUPTI event API from the header cupti_events.h are not supported on devices with compute capability 7.5 and higher (i.e. Turing + and later GPU architectures). These API will be deprecated in a future CUDA release. These are replaced by Profiling API in + the header cupti_profiler_target.h and Perfworks metrics API in the headers nvperf_host.h and nvperf_target.h which are supported + on devices with compute capability 7.0 and higher (i.e. Volta and later GPU architectures). +

+
+

+

+
+

Classes

+
+
struct  +
+
A set of event groups.
+
struct  +
+
A set of event group sets.
+
+

Defines

+
+
#define CUPTI_EVENT_INVALID
+
The value that indicates the event value is invalid.
+
#define CUPTI_EVENT_OVERFLOW
+
The overflow value for a CUPTI event.
+
+

Typedefs

+
+
typedef uint32_t  CUpti_EventDomainID
+
ID for an event domain.
+
typedef void *  CUpti_EventGroup
+
A group of events.
+
typedef uint32_t  CUpti_EventID
+
ID for an event.
+
typedef + void +  ( *CUpti_KernelReplayUpdateFunc )( const char* +  kernelName,  int numReplaysDone, void* +  customData )
+
Function type for getting updates on kernel replay.
+
+

Enumerations

+
+
enum CUpti_DeviceAttribute
+
Device attributes.
+
enum CUpti_DeviceAttributeDeviceClass
+
Device class.
+
enum CUpti_EventAttribute
+
Event attributes.
+
enum CUpti_EventCategory
+
An event category.
+
enum CUpti_EventCollectionMethod
+
The collection method used for an event.
+
enum CUpti_EventCollectionMode
+
Event collection modes.
+
enum CUpti_EventDomainAttribute
+
Event domain attributes.
+
enum CUpti_EventGroupAttribute
+
Event group attributes.
+
enum CUpti_EventProfilingScope
+
Profiling scope for event.
+
enum CUpti_ReadEventFlags
+
Flags for cuptiEventGroupReadEvent an cuptiEventGroupReadAllEvents.
+
+

Functions

+
+
CUptiResult cuptiDeviceEnumEventDomains ( CUdevice device, size_t* arraySizeBytes, CUpti_EventDomainID* domainArray )
+
Get the event domains for a device.
+
CUptiResult cuptiDeviceGetAttribute ( CUdevice device, CUpti_DeviceAttribute attrib, size_t* valueSize, void* value )
+
Read a device attribute.
+
CUptiResult cuptiDeviceGetEventDomainAttribute ( CUdevice device, CUpti_EventDomainID eventDomain, CUpti_EventDomainAttribute attrib, size_t* valueSize, void* value )
+
Read an event domain attribute.
+
CUptiResult cuptiDeviceGetNumEventDomains ( CUdevice device, uint32_t* numDomains )
+
Get the number of domains for a device.
+
CUptiResult cuptiDeviceGetTimestamp ( CUcontext context, uint64_t* timestamp )
+
Read a device timestamp.
+
CUptiResult cuptiDisableKernelReplayMode ( CUcontext context )
+
Disable kernel replay mode.
+
CUptiResult cuptiEnableKernelReplayMode ( CUcontext context )
+
Enable kernel replay mode.
+
CUptiResult cuptiEnumEventDomains ( size_t* arraySizeBytes, CUpti_EventDomainID* domainArray )
+
Get the event domains available on any device.
+
CUptiResult cuptiEventDomainEnumEvents ( CUpti_EventDomainID eventDomain, size_t* arraySizeBytes, CUpti_EventID* eventArray )
+
Get the events in a domain.
+
CUptiResult cuptiEventDomainGetAttribute ( CUpti_EventDomainID eventDomain, CUpti_EventDomainAttribute attrib, size_t* valueSize, void* value )
+
Read an event domain attribute.
+
CUptiResult cuptiEventDomainGetNumEvents ( CUpti_EventDomainID eventDomain, uint32_t* numEvents )
+
Get number of events in a domain.
+
CUptiResult cuptiEventGetAttribute ( CUpti_EventID event, CUpti_EventAttribute attrib, size_t* valueSize, void* value )
+
Get an event attribute.
+
CUptiResult cuptiEventGetIdFromName ( CUdevice device, const char* eventName, CUpti_EventID* event )
+
Find an event by name.
+
CUptiResult cuptiEventGroupAddEvent ( CUpti_EventGroup eventGroup, CUpti_EventID event )
+
Add an event to an event group.
+
CUptiResult cuptiEventGroupCreate ( CUcontext context, CUpti_EventGroup* eventGroup, uint32_t flags )
+
Create a new event group for a context.
+
CUptiResult cuptiEventGroupDestroy ( CUpti_EventGroup eventGroup )
+
Destroy an event group.
+
CUptiResult cuptiEventGroupDisable ( CUpti_EventGroup eventGroup )
+
Disable an event group.
+
CUptiResult cuptiEventGroupEnable ( CUpti_EventGroup eventGroup )
+
Enable an event group.
+
CUptiResult cuptiEventGroupGetAttribute ( CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, size_t* valueSize, void* value )
+
Read an event group attribute.
+
CUptiResult cuptiEventGroupReadAllEvents ( CUpti_EventGroup eventGroup, CUpti_ReadEventFlags flags, size_t* eventValueBufferSizeBytes, uint64_t* eventValueBuffer, size_t* eventIdArraySizeBytes, CUpti_EventID* eventIdArray, size_t* numEventIdsRead )
+
Read the values for all the events in an event group.
+
CUptiResult cuptiEventGroupReadEvent ( CUpti_EventGroup eventGroup, CUpti_ReadEventFlags flags, CUpti_EventID event, size_t* eventValueBufferSizeBytes, uint64_t* eventValueBuffer )
+
Read the value for an event in an event group.
+
CUptiResult cuptiEventGroupRemoveAllEvents ( CUpti_EventGroup eventGroup )
+
Remove all events from an event group.
+
CUptiResult cuptiEventGroupRemoveEvent ( CUpti_EventGroup eventGroup, CUpti_EventID event )
+
Remove an event from an event group.
+
CUptiResult cuptiEventGroupResetAllEvents ( CUpti_EventGroup eventGroup )
+
Zero all the event counts in an event group.
+
CUptiResult cuptiEventGroupSetAttribute ( CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, size_t valueSize, void* value )
+
Write an event group attribute.
+
CUptiResult cuptiEventGroupSetDisable ( CUpti_EventGroupSet* eventGroupSet )
+
Disable an event group set.
+
CUptiResult cuptiEventGroupSetEnable ( CUpti_EventGroupSet* eventGroupSet )
+
Enable an event group set.
+
CUptiResult cuptiEventGroupSetsCreate ( CUcontext context, size_t eventIdArraySizeBytes, CUpti_EventID* eventIdArray, CUpti_EventGroupSets** eventGroupPasses )
+
For a set of events, get the grouping that indicates the number of passes and the event groups necessary to collect the events. +
+
CUptiResult cuptiEventGroupSetsDestroy ( CUpti_EventGroupSets* eventGroupSets )
+
Destroy a event group sets object.
+
CUptiResult cuptiGetNumEventDomains ( uint32_t* numDomains )
+
Get the number of event domains available on any device.
+
CUptiResult cuptiKernelReplaySubscribeUpdate ( CUpti_KernelReplayUpdateFunc updateFunc, void* customData )
+
Subscribe to kernel replay updates.
+
CUptiResult cuptiSetEventCollectionMode ( CUcontext context, CUpti_EventCollectionMode mode )
+
Set the event collection mode.
+
+
+

Defines

+
+
+ #define CUPTI_EVENT_INVALID
+
+
+

+
+
+
+ Value + +
+

((uint64_t)0xFFFFFFFFFFFFFFFEULL)

+
+
+
+ #define CUPTI_EVENT_OVERFLOW
+
+
+

The CUPTI event value that indicates an overflow.

+
+
+
+ Value + +
+

((uint64_t)0xFFFFFFFFFFFFFFFFULL)

+
+
+
+
+
+

Typedefs

+
+
+ typedef uint32_t CUpti_EventDomainID
+
+
+

ID for an event domain. ID for an event domain. An event domain represents a group of related events. A device may have multiple + instances of a domain, indicating that the device can simultaneously record multiple instances of each event within that domain. + +

+
+
+
+ typedef void * CUpti_EventGroup
+
+
+

A group of events. An event group is a collection of events that are managed together. All events in an event group must + belong to the same domain. +

+
+
+
+ typedef uint32_t CUpti_EventID
+
+
+

ID for an event. An event represents a countable activity, action, or occurrence on the device.

+
+
+
+ + void + ( *CUpti_KernelReplayUpdateFunc )( const char* +  kernelName,  int numReplaysDone, void* +  customData )
+
+
+

Function type for getting updates on kernel replay. + +

+
+
+
+
+ Parameters + +
+
+
kernelName
+
The mangled kernel name
+
int numReplaysDone
+
+
customData
+
Pointer of any custom data passed in when subscribing
+
+
+
+
+
+

Enumerations

+
+
+ enum CUpti_DeviceAttribute
+
+
+

CUPTI device attributes. These attributes can be read using cuptiDeviceGetAttribute. +

+
+
+
+ Values + +
+
+
CUPTI_DEVICE_ATTR_MAX_EVENT_ID = 1
+
Number of event IDs for a device. Value is a uint32_t.
+
CUPTI_DEVICE_ATTR_MAX_EVENT_DOMAIN_ID = 2
+
Number of event domain IDs for a device. Value is a uint32_t.
+
CUPTI_DEVICE_ATTR_GLOBAL_MEMORY_BANDWIDTH = 3
+
Get global memory bandwidth in Kbytes/sec. Value is a uint64_t.
+
CUPTI_DEVICE_ATTR_INSTRUCTION_PER_CYCLE = 4
+
Get theoretical maximum number of instructions per cycle. Value is a uint32_t.
+
CUPTI_DEVICE_ATTR_INSTRUCTION_THROUGHPUT_SINGLE_PRECISION = 5
+
Get theoretical maximum number of single precision instructions that can be executed per second. Value is a uint64_t.
+
CUPTI_DEVICE_ATTR_MAX_FRAME_BUFFERS = 6
+
Get number of frame buffers for device. Value is a uint64_t.
+
CUPTI_DEVICE_ATTR_PCIE_LINK_RATE = 7
+
Get PCIE link rate in Mega bits/sec for device. Return 0 if bus-type is non-PCIE. Value is a uint64_t.
+
CUPTI_DEVICE_ATTR_PCIE_LINK_WIDTH = 8
+
Get PCIE link width for device. Return 0 if bus-type is non-PCIE. Value is a uint64_t.
+
CUPTI_DEVICE_ATTR_PCIE_GEN = 9
+
Get PCIE generation for device. Return 0 if bus-type is non-PCIE. Value is a uint64_t.
+
CUPTI_DEVICE_ATTR_DEVICE_CLASS = 10
+
Get the class for the device. Value is a CUpti_DeviceAttributeDeviceClass.
+
CUPTI_DEVICE_ATTR_FLOP_SP_PER_CYCLE = 11
+
Get the peak single precision flop per cycle. Value is a uint64_t.
+
CUPTI_DEVICE_ATTR_FLOP_DP_PER_CYCLE = 12
+
Get the peak double precision flop per cycle. Value is a uint64_t.
+
CUPTI_DEVICE_ATTR_MAX_L2_UNITS = 13
+
Get number of L2 units. Value is a uint64_t.
+
CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_SHARED = 14
+
Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_SHARED preference. Value is a uint64_t.
+
CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_L1 = 15
+
Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_L1 preference. Value is a uint64_t.
+
CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_EQUAL = 16
+
Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_EQUAL preference. Value is a uint64_t.
+
CUPTI_DEVICE_ATTR_FLOP_HP_PER_CYCLE = 17
+
Get the peak half precision flop per cycle. Value is a uint64_t.
+
CUPTI_DEVICE_ATTR_NVLINK_PRESENT = 18
+
Check if Nvlink is connected to device. Returns 1, if at least one Nvlink is connected to the device, returns 0 otherwise. + Value is a uint32_t. +
+
CUPTI_DEVICE_ATTR_GPU_CPU_NVLINK_BW = 19
+
Check if Nvlink is present between GPU and CPU. Returns Bandwidth, in Bytes/sec, if Nvlink is present, returns 0 otherwise. + Value is a uint64_t. +
+
CUPTI_DEVICE_ATTR_NVSWITCH_PRESENT = 20
+
Check if NVSwitch is present in the underlying topology. Returns 1, if present, returns 0 otherwise. Value is a uint32_t. + +
+
CUPTI_DEVICE_ATTR_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_DeviceAttributeDeviceClass
+
+
+

Enumeration of device classes for device attribute CUPTI_DEVICE_ATTR_DEVICE_CLASS.

+
+
+
+ Values + +
+
+
CUPTI_DEVICE_ATTR_DEVICE_CLASS_TESLA = 0
+
+
CUPTI_DEVICE_ATTR_DEVICE_CLASS_QUADRO = 1
+
+
CUPTI_DEVICE_ATTR_DEVICE_CLASS_GEFORCE = 2
+
+
CUPTI_DEVICE_ATTR_DEVICE_CLASS_TEGRA = 3
+
+
+
+
+
+ enum CUpti_EventAttribute
+
+
+

Event attributes. These attributes can be read using cuptiEventGetAttribute. +

+
+
+
+ Values + +
+
+
CUPTI_EVENT_ATTR_NAME = 0
+
Event name. Value is a null terminated const c-string.
+
CUPTI_EVENT_ATTR_SHORT_DESCRIPTION = 1
+
Short description of event. Value is a null terminated const c-string.
+
CUPTI_EVENT_ATTR_LONG_DESCRIPTION = 2
+
Long description of event. Value is a null terminated const c-string.
+
CUPTI_EVENT_ATTR_CATEGORY = 3
+
Category of event. Value is CUpti_EventCategory.
+
CUPTI_EVENT_ATTR_PROFILING_SCOPE = 5
+
Profiling scope of the events. It can be either device or context or both. Value is a CUpti_EventProfilingScope. +
+
CUPTI_EVENT_ATTR_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_EventCategory
+
+
+

Each event is assigned to a category that represents the general type of the event. A event's category is accessed using cuptiEventGetAttribute and the CUPTI_EVENT_ATTR_CATEGORY attribute. +

+
+
+
+ Values + +
+
+
CUPTI_EVENT_CATEGORY_INSTRUCTION = 0
+
An instruction related event.
+
CUPTI_EVENT_CATEGORY_MEMORY = 1
+
A memory related event.
+
CUPTI_EVENT_CATEGORY_CACHE = 2
+
A cache related event.
+
CUPTI_EVENT_CATEGORY_PROFILE_TRIGGER = 3
+
A profile-trigger event.
+
CUPTI_EVENT_CATEGORY_SYSTEM = 4
+
A system event.
+
CUPTI_EVENT_CATEGORY_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_EventCollectionMethod
+
+
+

The collection method indicates how an event is collected.

+
+
+
+ Values + +
+
+
CUPTI_EVENT_COLLECTION_METHOD_PM = 0
+
Event is collected using a hardware global performance monitor.
+
CUPTI_EVENT_COLLECTION_METHOD_SM = 1
+
Event is collected using a hardware SM performance monitor.
+
CUPTI_EVENT_COLLECTION_METHOD_INSTRUMENTED = 2
+
Event is collected using software instrumentation.
+
CUPTI_EVENT_COLLECTION_METHOD_NVLINK_TC = 3
+
Event is collected using NvLink throughput counter method.
+
CUPTI_EVENT_COLLECTION_METHOD_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_EventCollectionMode
+
+
+

The event collection mode determines the period over which the events within the enabled event groups will be collected.

+
+
+
+ Values + +
+
+
CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS = 0
+
Events are collected for the entire duration between the cuptiEventGroupEnable and cuptiEventGroupDisable calls. Event values + are reset when the events are read. For CUDA toolkit v6.0 and older this was the default mode. +
+
CUPTI_EVENT_COLLECTION_MODE_KERNEL = 1
+
Events are collected only for the durations of kernel executions that occur between the cuptiEventGroupEnable and cuptiEventGroupDisable + calls. Event collection begins when a kernel execution begins, and stops when kernel execution completes. Event values are + reset to zero when each kernel execution begins. If multiple kernel executions occur between the cuptiEventGroupEnable and + cuptiEventGroupDisable calls then the event values must be read after each kernel launch if those events need to be associated + with the specific kernel launch. Note that collection in this mode may significantly change the overall performance characteristics + of the application because kernel executions that occur between the cuptiEventGroupEnable and cuptiEventGroupDisable calls + are serialized on the GPU. This is the default mode from CUDA toolkit v6.5 +
+
CUPTI_EVENT_COLLECTION_MODE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_EventDomainAttribute
+
+
+

Event domain attributes. Except where noted, all the attributes can be read using either cuptiDeviceGetEventDomainAttribute or cuptiEventDomainGetAttribute. +

+
+
+
+ Values + +
+
+
CUPTI_EVENT_DOMAIN_ATTR_NAME = 0
+
Event domain name. Value is a null terminated const c-string.
+
CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT = 1
+
Number of instances of the domain for which event counts will be collected. The domain may have additional instances that + cannot be profiled (see CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT). Can be read only with cuptiDeviceGetEventDomainAttribute. Value is a uint32_t. +
+
CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT = 3
+
Total number of instances of the domain, including instances that cannot be profiled. Use CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT + to get the number of instances that can be profiled. Can be read only with cuptiDeviceGetEventDomainAttribute. Value is a uint32_t. +
+
CUPTI_EVENT_DOMAIN_ATTR_COLLECTION_METHOD = 4
+
Collection method used for events contained in the event domain. Value is a CUpti_EventCollectionMethod. +
+
CUPTI_EVENT_DOMAIN_ATTR_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_EventGroupAttribute
+
+
+

Event group attributes. These attributes can be read using cuptiEventGroupGetAttribute. Attributes marked [rw] can also be written using cuptiEventGroupSetAttribute. +

+
+
+
+ Values + +
+
+
CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID = 0
+
The domain to which the event group is bound. This attribute is set when the first event is added to the group. Value is a + CUpti_EventDomainID. +
+
CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES = 1
+
[rw] Profile all the instances of the domain for this eventgroup. This feature can be used to get load balancing across all + instances of a domain. Value is an integer. +
+
CUPTI_EVENT_GROUP_ATTR_USER_DATA = 2
+
[rw] Reserved for user data.
+
CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS = 3
+
Number of events in the group. Value is a uint32_t.
+
CUPTI_EVENT_GROUP_ATTR_EVENTS = 4
+
Enumerates events in the group. Value is a pointer to buffer of size sizeof(CUpti_EventID) * num_of_events in the eventgroup. + num_of_events can be queried using CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS. +
+
CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT = 5
+
Number of instances of the domain bound to this event group that will be counted. Value is a uint32_t.
+
CUPTI_EVENT_GROUP_ATTR_PROFILING_SCOPE = 6
+
Event group scope can be set to CUPTI_EVENT_PROFILING_SCOPE_DEVICE or CUPTI_EVENT_PROFILING_SCOPE_CONTEXT for an eventGroup, + before adding any event. Sets the scope of eventgroup as CUPTI_EVENT_PROFILING_SCOPE_DEVICE or CUPTI_EVENT_PROFILING_SCOPE_CONTEXT + when the scope of the events that will be added is CUPTI_EVENT_PROFILING_SCOPE_BOTH. If profiling scope of event is either + CUPTI_EVENT_PROFILING_SCOPE_DEVICE or CUPTI_EVENT_PROFILING_SCOPE_CONTEXT then setting this attribute will not affect the + default scope. It is not allowed to add events of different scope to same eventgroup. Value is a uint32_t. +
+
CUPTI_EVENT_GROUP_ATTR_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_EventProfilingScope
+
+
+

Profiling scope of event indicates if the event can be collected at context scope or device scope or both i.e. it can be collected + at any of context or device scope. +

+
+
+
+ Values + +
+
+
CUPTI_EVENT_PROFILING_SCOPE_CONTEXT = 0
+
Event is collected at context scope.
+
CUPTI_EVENT_PROFILING_SCOPE_DEVICE = 1
+
Event is collected at device scope.
+
CUPTI_EVENT_PROFILING_SCOPE_BOTH = 2
+
Event can be collected at device or context scope. The scope can be set using cuptiEventGroupSetAttribute API. +
+
CUPTI_EVENT_PROFILING_SCOPE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_ReadEventFlags
+
+ +
+
+ Values + +
+
+
CUPTI_EVENT_READ_FLAG_NONE = 0
+
No flags.
+
CUPTI_EVENT_READ_FLAG_FORCE_INT = 0x7fffffff
+
+
+
+
+
+
+
+

Functions

+
+
+ CUptiResult cuptiDeviceEnumEventDomains ( CUdevice device, size_t* arraySizeBytes, CUpti_EventDomainID* domainArray )
+
+
Get the event domains for a device.
+
+
+ Parameters + +
+
+
device
+
The CUDA device
+
arraySizeBytes
+
The size of domainArray in bytes, and returns the number of bytes written to domainArray
+
domainArray
+
Returns the IDs of the event domains for the device
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_DEVICE +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if arraySizeBytes or domainArray are NULL +

    +
  • +
+

+
+
+
Description
+

Returns the event domains IDs in domainArray for a device. The size of the domainArray buffer is given by *arraySizeBytes. The size of the domainArray buffer must be at least numdomains * sizeof(CUpti_EventDomainID) or else all domains will not be returned. The value returned in *arraySizeBytes contains the number of bytes returned in domainArray. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiDeviceGetAttribute ( CUdevice device, CUpti_DeviceAttribute attrib, size_t* valueSize, void* value )
+
+
Read a device attribute.
+
+
+ Parameters + +
+
+
device
+
The CUDA device
+
attrib
+
The attribute to read
+
valueSize
+
Size of buffer pointed by the value, and returns the number of bytes written to value
+
value
+
Returns the value of the attribute
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_DEVICE +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if valueSize or value is NULL, or if attrib is not a device attribute +

    +
  • +
  • CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT +

    For non-c-string attribute values, indicates that the value buffer is too small to hold the attribute value. +

    +
  • +
+

+
+
+
Description
+

Read a device attribute and return it in *value. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiDeviceGetEventDomainAttribute ( CUdevice device, CUpti_EventDomainID eventDomain, CUpti_EventDomainAttribute attrib, size_t* valueSize, void* value )
+
+
Read an event domain attribute.
+
+
+ Parameters + +
+
+
device
+
The CUDA device
+
eventDomain
+
ID of the event domain
+
attrib
+
The event domain attribute to read
+
valueSize
+
The size of the value buffer in bytes, and returns the number of bytes written to value
+
value
+
Returns the attribute's value
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_DEVICE +

    +
  • +
  • CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if valueSize or value is NULL, or if attrib is not an event domain attribute +

    +
  • +
  • CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT +

    For non-c-string attribute values, indicates that the value buffer is too small to hold the attribute value. +

    +
  • +
+

+
+
+
Description
+

Returns an event domain attribute in *value. The size of the value buffer is given by *valueSize. The value returned in *valueSize contains the number of bytes returned in value. +

+

If the attribute value is a c-string that is longer than *valueSize, then only the first *valueSize characters will be returned and there will be no terminating null byte. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiDeviceGetNumEventDomains ( CUdevice device, uint32_t* numDomains )
+
+
Get the number of domains for a device.
+
+
+ Parameters + +
+
+
device
+
The CUDA device
+
numDomains
+
Returns the number of domains
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_DEVICE +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if numDomains is NULL +

    +
  • +
+

+
+
+
Description
+

Returns the number of domains in numDomains for a device. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiDeviceGetTimestamp ( CUcontext context, uint64_t* timestamp )
+
+
Read a device timestamp.
+
+
+ Parameters + +
+
+
context
+
A context on the device from which to get the timestamp
+
timestamp
+
Returns the device timestamp
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_CONTEXT +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    is timestamp is NULL +

    +
  • +
+

+
+
+
Description
+

Returns the device timestamp in *timestamp. The timestamp is reported in nanoseconds and indicates the time since the device was last reset. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+ + + **DEPRECATED** This API is deprecated as of CUDA 11.3 +

+
+
+
+ CUptiResult cuptiDisableKernelReplayMode ( CUcontext context )
+
+
Disable kernel replay mode.
+
+
+ Parameters + +
+
+
context
+
The context
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
+

+
+
+
Description
+

Set profiling mode for the context to non-replay (default) mode. Event collection mode will be set to CUPTI_EVENT_COLLECTION_MODE_KERNEL. + All previously enabled event groups and event group sets will be disabled. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiEnableKernelReplayMode ( CUcontext context )
+
+
Enable kernel replay mode.
+
+
+ Parameters + +
+
+
context
+
The context
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
+

+
+
+
Description
+

Set profiling mode for the context to replay mode. In this mode, any number of events can be collected in one run of the kernel. + The event collection mode will automatically switch to CUPTI_EVENT_COLLECTION_MODE_KERNEL. In this mode, cuptiSetEventCollectionMode will return CUPTI_ERROR_INVALID_OPERATION. +

+

+

Note:
    +
  • +

    Kernels might take longer to run if many events are enabled. +

    +
  • +
  • +

    Thread-safety: this function is thread safe. +

    +
  • +
+
+

+

+
+
+
+ CUptiResult cuptiEnumEventDomains ( size_t* arraySizeBytes, CUpti_EventDomainID* domainArray )
+
+
Get the event domains available on any device.
+
+
+ Parameters + +
+
+
arraySizeBytes
+
The size of domainArray in bytes, and returns the number of bytes written to domainArray
+
domainArray
+
Returns all the event domains
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if arraySizeBytes or domainArray are NULL +

    +
  • +
+

+
+
+
Description
+

Returns all the event domains available on any CUDA-capable device. Event domain IDs are returned in domainArray. The size of the domainArray buffer is given by *arraySizeBytes. The size of the domainArray buffer must be at least numDomains * sizeof(CUpti_EventDomainID) or all domains will not be returned. The value returned in *arraySizeBytes contains the number of bytes returned in domainArray. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiEventDomainEnumEvents ( CUpti_EventDomainID eventDomain, size_t* arraySizeBytes, CUpti_EventID* eventArray )
+
+
Get the events in a domain.
+
+
+ Parameters + +
+
+
eventDomain
+
ID of the event domain
+
arraySizeBytes
+
The size of eventArray in bytes, and returns the number of bytes written to eventArray
+
eventArray
+
Returns the IDs of the events in the domain
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if arraySizeBytes or eventArray are NULL +

    +
  • +
+

+
+
+
Description
+

Returns the event IDs in eventArray for a domain. The size of the eventArray buffer is given by *arraySizeBytes. The size of the eventArray buffer must be at least numdomainevents * sizeof(CUpti_EventID) or else all events will not be returned. The value returned in *arraySizeBytes contains the number of bytes returned in eventArray. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiEventDomainGetAttribute ( CUpti_EventDomainID eventDomain, CUpti_EventDomainAttribute attrib, size_t* valueSize, void* value )
+
+
Read an event domain attribute.
+
+
+ Parameters + +
+
+
eventDomain
+
ID of the event domain
+
attrib
+
The event domain attribute to read
+
valueSize
+
The size of the value buffer in bytes, and returns the number of bytes written to value
+
value
+
Returns the attribute's value
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if valueSize or value is NULL, or if attrib is not an event domain attribute +

    +
  • +
  • CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT +

    For non-c-string attribute values, indicates that the value buffer is too small to hold the attribute value. +

    +
  • +
+

+
+
+
Description
+

Returns an event domain attribute in *value. The size of the value buffer is given by *valueSize. The value returned in *valueSize contains the number of bytes returned in value. +

+

If the attribute value is a c-string that is longer than *valueSize, then only the first *valueSize characters will be returned and there will be no terminating null byte. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiEventDomainGetNumEvents ( CUpti_EventDomainID eventDomain, uint32_t* numEvents )
+
+
Get number of events in a domain.
+
+
+ Parameters + +
+
+
eventDomain
+
ID of the event domain
+
numEvents
+
Returns the number of events in the domain
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if numEvents is NULL +

    +
  • +
+

+
+
+
Description
+

Returns the number of events in numEvents for a domain. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiEventGetAttribute ( CUpti_EventID event, CUpti_EventAttribute attrib, size_t* valueSize, void* value )
+
+
Get an event attribute.
+
+
+ Parameters + +
+
+
event
+
ID of the event
+
attrib
+
The event attribute to read
+
valueSize
+
The size of the value buffer in bytes, and returns the number of bytes written to value
+
value
+
Returns the attribute's value
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_EVENT_ID +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if valueSize or value is NULL, or if attrib is not an event attribute +

    +
  • +
  • CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT +

    For non-c-string attribute values, indicates that the value buffer is too small to hold the attribute value. +

    +
  • +
+

+
+
+
Description
+

Returns an event attribute in *value. The size of the value buffer is given by *valueSize. The value returned in *valueSize contains the number of bytes returned in value. +

+

If the attribute value is a c-string that is longer than *valueSize, then only the first *valueSize characters will be returned and there will be no terminating null byte. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiEventGetIdFromName ( CUdevice device, const char* eventName, CUpti_EventID* event )
+
+
Find an event by name.
+
+
+ Parameters + +
+
+
device
+
The CUDA device
+
eventName
+
The name of the event to find
+
event
+
Returns the ID of the found event or undefined if unable to find the event
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_DEVICE +

    +
  • +
  • CUPTI_ERROR_INVALID_EVENT_NAME +

    if unable to find an event with name eventName. In this case *event is undefined +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventName or event are NULL +

    +
  • +
+

+
+
+
Description
+

Find an event by name and return the event ID in *event. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiEventGroupAddEvent ( CUpti_EventGroup eventGroup, CUpti_EventID event )
+
+
Add an event to an event group.
+
+
+ Parameters + +
+
+
eventGroup
+
The event group
+
event
+
The event to add to the group
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_EVENT_ID +

    +
  • +
  • CUPTI_ERROR_OUT_OF_MEMORY +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if eventGroup is enabled +

    +
  • +
  • CUPTI_ERROR_NOT_COMPATIBLE +

    if event belongs to a different event domain than the events already in eventGroup, or if a device limitation prevents event from being collected at the same time as the events already in eventGroup

    +
  • +
  • CUPTI_ERROR_MAX_LIMIT_REACHED +

    if eventGroup is full +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventGroup is NULL +

    +
  • +
+

+
+
+
Description
+

Add an event to an event group. The event add can fail for a number of reasons: +

    +
  • +

    The event group is enabled

    +
  • +
  • +

    The event does not belong to the same event domain as the events that are already in the event group

    +
  • +
  • +

    Device limitations on the events that can belong to the same group

    +
  • +
  • +

    The event group is full

    +
  • +
+

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiEventGroupCreate ( CUcontext context, CUpti_EventGroup* eventGroup, uint32_t flags )
+
+
Create a new event group for a context.
+
+
+ Parameters + +
+
+
context
+
The context for the event group
+
eventGroup
+
Returns the new event group
+
flags
+
Reserved - must be zero
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_CONTEXT +

    +
  • +
  • CUPTI_ERROR_OUT_OF_MEMORY +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventGroup is NULL +

    +
  • +
+

+
+
+
Description
+

Creates a new event group for context and returns the new group in *eventGroup. +

+

+

Note:
    +
  • +

    flags are reserved for future use and should be set to zero. +

    +
  • +
  • +

    Thread-safety: this function is thread safe. +

    +
  • +
+
+

+

+
+
+
+ CUptiResult cuptiEventGroupDestroy ( CUpti_EventGroup eventGroup )
+
+
Destroy an event group.
+
+
+ Parameters + +
+
+
eventGroup
+
The event group to destroy
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if the event group is enabled

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventGroup is NULL

    +
  • +
+

+
+
+
Description
+

Destroy an eventGroup and free its resources. An event group cannot be destroyed if it is enabled. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiEventGroupDisable ( CUpti_EventGroup eventGroup )
+
+
Disable an event group.
+
+
+ Parameters + +
+
+
eventGroup
+
The event group
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_HARDWARE +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventGroup is NULL +

    +
  • +
+

+
+
+
Description
+

Disable an event group. Disabling an event group stops collection of events contained in the group.

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiEventGroupEnable ( CUpti_EventGroup eventGroup )
+
+
Enable an event group.
+
+
+ Parameters + +
+
+
eventGroup
+
The event group
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_HARDWARE +

    +
  • +
  • CUPTI_ERROR_NOT_READY +

    if eventGroup does not contain any events +

    +
  • +
  • CUPTI_ERROR_NOT_COMPATIBLE +

    if eventGroup cannot be enabled due to other already enabled event groups +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventGroup is NULL +

    +
  • +
  • CUPTI_ERROR_HARDWARE_BUSY +

    if another client is profiling and hardware is busy

    +
  • +
+

+
+
+
Description
+

Enable an event group. Enabling an event group zeros the value of all the events in the group and then starts collection of + those events. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiEventGroupGetAttribute ( CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, size_t* valueSize, void* value )
+
+
Read an event group attribute.
+
+
+ Parameters + +
+
+
eventGroup
+
The event group
+
attrib
+
The attribute to read
+
valueSize
+
Size of buffer pointed by the value, and returns the number of bytes written to value
+
value
+
Returns the value of the attribute
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if valueSize or value is NULL, or if attrib is not an eventgroup attribute +

    +
  • +
  • CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT +

    For non-c-string attribute values, indicates that the value buffer is too small to hold the attribute value. +

    +
  • +
+

+
+
+
Description
+

Read an event group attribute and return it in *value. +

+

+

Note:

Thread-safety: this function is thread safe but client must guard against simultaneous destruction or modification of eventGroup (for example, client must guard against simultaneous calls to cuptiEventGroupDestroy, cuptiEventGroupAddEvent, etc.), and must guard against simultaneous destruction of the context in which eventGroup was created (for example, client must guard against simultaneous calls to cudaDeviceReset, cuCtxDestroy, etc.). +

+
+

+

+
+
+
+ CUptiResult cuptiEventGroupReadAllEvents ( CUpti_EventGroup eventGroup, CUpti_ReadEventFlags flags, size_t* eventValueBufferSizeBytes, uint64_t* eventValueBuffer, size_t* eventIdArraySizeBytes, CUpti_EventID* eventIdArray, size_t* numEventIdsRead )
+
+
Read the values for all the events in an event group.
+
+
+ Parameters + +
+
+
eventGroup
+
The event group
+
flags
+
Flags controlling the reading mode
+
eventValueBufferSizeBytes
+
The size of eventValueBuffer in bytes, and returns the number of bytes written to eventValueBuffer
+
eventValueBuffer
+
Returns the event values
+
eventIdArraySizeBytes
+
The size of eventIdArray in bytes, and returns the number of bytes written to eventIdArray
+
eventIdArray
+
Returns the IDs of the events in the same order as the values return in eventValueBuffer.
+
numEventIdsRead
+
Returns the number of event IDs returned in eventIdArray
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_HARDWARE +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if eventGroup is disabled +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventGroup, eventValueBufferSizeBytes, eventValueBuffer, eventIdArraySizeBytes, eventIdArray or numEventIdsRead is NULL +

    +
  • +
  • CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT +

    if size of eventValueBuffer or eventIdArray is not sufficient +

    +
  • +
+

+
+
+
Description
+

Read the values for all the events in an event group. The event values are returned in the eventValueBuffer buffer. eventValueBufferSizeBytes indicates the size of eventValueBuffer. The buffer must be at least (sizeof(uint64) * number of events in group) if CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is not set on the group containing the events. The buffer must be at least (sizeof(uint64) * number of domain instances * + number of events in group) if CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is set on the group. +

+

The data format returned in eventValueBuffer is: +

    +
  • +

    domain instance 0: event0 event1 ... eventN

    +
  • +
  • +

    domain instance 1: event0 event1 ... eventN

    +
  • +
  • +

    ...

    +
  • +
  • +

    domain instance M: event0 event1 ... eventN

    +
  • +
+

+

The event order in eventValueBuffer is returned in eventIdArray. The size of eventIdArray is specified in eventIdArraySizeBytes. The size should be at least (sizeof(CUpti_EventID) * number of events in group). +

+

If any instance of any event counter overflows, the value returned for that event instance will be CUPTI_EVENT_OVERFLOW. +

+

The only allowed value for flags is CUPTI_EVENT_READ_FLAG_NONE. +

+

Reading events from a disabled event group is not allowed. After being read, an event's value is reset to zero.

+

+

Note:

Thread-safety: this function is thread safe but client must guard against simultaneous destruction or modification of eventGroup (for example, client must guard against simultaneous calls to cuptiEventGroupDestroy, cuptiEventGroupAddEvent, etc.), and must guard against simultaneous destruction of the context in which eventGroup was created (for example, client must guard against simultaneous calls to cudaDeviceReset, cuCtxDestroy, etc.). If cuptiEventGroupResetAllEvents is called simultaneously with this function, then returned event values are undefined. +

+
+

+

+
+
+
+ CUptiResult cuptiEventGroupReadEvent ( CUpti_EventGroup eventGroup, CUpti_ReadEventFlags flags, CUpti_EventID event, size_t* eventValueBufferSizeBytes, uint64_t* eventValueBuffer )
+
+
Read the value for an event in an event group.
+
+
+ Parameters + +
+
+
eventGroup
+
The event group
+
flags
+
Flags controlling the reading mode
+
event
+
The event to read
+
eventValueBufferSizeBytes
+
The size of eventValueBuffer in bytes, and returns the number of bytes written to eventValueBuffer
+
eventValueBuffer
+
Returns the event value(s)
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_EVENT_ID +

    +
  • +
  • CUPTI_ERROR_HARDWARE +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if eventGroup is disabled +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventGroup, eventValueBufferSizeBytes or eventValueBuffer is NULL +

    +
  • +
  • CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT +

    if size of eventValueBuffer is not sufficient +

    +
  • +
+

+
+
+
Description
+

Read the value for an event in an event group. The event value is returned in the eventValueBuffer buffer. eventValueBufferSizeBytes indicates the size of the eventValueBuffer buffer. The buffer must be at least sizeof(uint64) if CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is not set on the group containing the event. The buffer must be at least (sizeof(uint64) * number of domain instances) if + CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is set on the group. +

+

If any instance of an event counter overflows, the value returned for that event instance will be CUPTI_EVENT_OVERFLOW. +

+

The only allowed value for flags is CUPTI_EVENT_READ_FLAG_NONE. +

+

Reading an event from a disabled event group is not allowed. After being read, an event's value is reset to zero.

+

+

Note:

Thread-safety: this function is thread safe but client must guard against simultaneous destruction or modification of eventGroup (for example, client must guard against simultaneous calls to cuptiEventGroupDestroy, cuptiEventGroupAddEvent, etc.), and must guard against simultaneous destruction of the context in which eventGroup was created (for example, client must guard against simultaneous calls to cudaDeviceReset, cuCtxDestroy, etc.). If cuptiEventGroupResetAllEvents is called simultaneously with this function, then returned event values are undefined. +

+
+

+

+
+
+
+ CUptiResult cuptiEventGroupRemoveAllEvents ( CUpti_EventGroup eventGroup )
+
+
Remove all events from an event group.
+
+
+ Parameters + +
+
+
eventGroup
+
The event group
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if eventGroup is enabled +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventGroup is NULL +

    +
  • +
+

+
+
+
Description
+

Remove all events from an event group. Events cannot be removed if the event group is enabled.

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiEventGroupRemoveEvent ( CUpti_EventGroup eventGroup, CUpti_EventID event )
+
+
Remove an event from an event group.
+
+
+ Parameters + +
+
+
eventGroup
+
The event group
+
event
+
The event to remove from the group
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_EVENT_ID +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if eventGroup is enabled +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventGroup is NULL +

    +
  • +
+

+
+
+
Description
+

Remove event from the an event group. The event cannot be removed if the event group is enabled. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiEventGroupResetAllEvents ( CUpti_EventGroup eventGroup )
+
+
Zero all the event counts in an event group.
+
+
+ Parameters + +
+
+
eventGroup
+
The event group
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_HARDWARE +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventGroup is NULL +

    +
  • +
+

+
+
+
Description
+

Zero all the event counts in an event group.

+

+

Note:

Thread-safety: this function is thread safe but client must guard against simultaneous destruction or modification of eventGroup (for example, client must guard against simultaneous calls to cuptiEventGroupDestroy, cuptiEventGroupAddEvent, etc.), and must guard against simultaneous destruction of the context in which eventGroup was created (for example, client must guard against simultaneous calls to cudaDeviceReset, cuCtxDestroy, etc.). +

+
+

+

+
+
+
+ CUptiResult cuptiEventGroupSetAttribute ( CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, size_t valueSize, void* value )
+
+
Write an event group attribute.
+
+
+ Parameters + +
+
+
eventGroup
+
The event group
+
attrib
+
The attribute to write
+
valueSize
+
The size, in bytes, of the value
+
value
+
The attribute value to write
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if valueSize or value is NULL, or if attrib is not an event group attribute, or if attrib is not a writable attribute +

    +
  • +
  • CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT +

    Indicates that the value buffer is too small to hold the attribute value. +

    +
  • +
+

+
+
+
Description
+

Write an event group attribute.

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiEventGroupSetDisable ( CUpti_EventGroupSet* eventGroupSet )
+
+
Disable an event group set.
+
+
+ Parameters + +
+
+
eventGroupSet
+
The pointer to the event group set
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_HARDWARE +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventGroupSet is NULL +

    +
  • +
+

+
+
+
Description
+

Disable a set of event groups. Disabling a set of event groups stops collection of events contained in the groups.

+

+

Note:
    +
  • +

    Thread-safety: this function is thread safe. +

    +
  • +
  • +

    If this call fails, some of the event groups in the set may be disabled and other event groups may remain enabled. +

    +
  • +
+
+

+

+
+
+
+ CUptiResult cuptiEventGroupSetEnable ( CUpti_EventGroupSet* eventGroupSet )
+
+
Enable an event group set.
+
+
+ Parameters + +
+
+
eventGroupSet
+
The pointer to the event group set
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_HARDWARE +

    +
  • +
  • CUPTI_ERROR_NOT_READY +

    if eventGroup does not contain any events +

    +
  • +
  • CUPTI_ERROR_NOT_COMPATIBLE +

    if eventGroup cannot be enabled due to other already enabled event groups +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventGroupSet is NULL +

    +
  • +
  • CUPTI_ERROR_HARDWARE_BUSY +

    if other client is profiling and hardware is busy

    +
  • +
+

+
+
+
Description
+

Enable a set of event groups. Enabling a set of event groups zeros the value of all the events in all the groups and then + starts collection of those events. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiEventGroupSetsCreate ( CUcontext context, size_t eventIdArraySizeBytes, CUpti_EventID* eventIdArray, CUpti_EventGroupSets** eventGroupPasses )
+
+
For a set of events, get the grouping that indicates the number of passes and the event groups necessary to collect the events. + +
+
+
+ Parameters + +
+
+
context
+
The context for event collection
+
eventIdArraySizeBytes
+
Size of eventIdArray in bytes +
+
eventIdArray
+
Array of event IDs that need to be grouped
+
eventGroupPasses
+
Returns a CUpti_EventGroupSets object that indicates the number of passes required to collect the events and the events to collect on each pass +
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_CONTEXT +

    +
  • +
  • CUPTI_ERROR_INVALID_EVENT_ID +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventIdArray or eventGroupPasses is NULL +

    +
  • +
+

+
+
+
Description
+

The number of events that can be collected simultaneously varies by device and by the type of the events. When events can + be collected simultaneously, they may need to be grouped into multiple event groups because they are from different event + domains. This function takes a set of events and determines how many passes are required to collect all those events, and + which events can be collected simultaneously in each pass. +

+

The CUpti_EventGroupSets returned in eventGroupPasses indicates how many passes are required to collect the events with the numSets field. Within each event group set, the sets array indicates the event groups that should be collected on each pass. +

+

+

Note:

Thread-safety: this function is thread safe, but client must guard against another thread simultaneously destroying context. +

+
+

+

+
+
+
+ CUptiResult cuptiEventGroupSetsDestroy ( CUpti_EventGroupSets* eventGroupSets )
+
+
Destroy a event group sets object.
+
+
+ Parameters + +
+
+
eventGroupSets
+
The object to destroy
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if any of the event groups contained in the sets is enabled

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventGroupSets is NULL +

    +
  • +
+

+
+
+
Description
+

Destroy a CUpti_EventGroupSets object. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiGetNumEventDomains ( uint32_t* numDomains )
+
+
Get the number of event domains available on any device.
+
+
+ Parameters + +
+
+
numDomains
+
Returns the number of domains
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if numDomains is NULL +

    +
  • +
+

+
+
+
Description
+

Returns the total number of event domains available on any CUDA-capable device.

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+ CUptiResult cuptiKernelReplaySubscribeUpdate ( CUpti_KernelReplayUpdateFunc updateFunc, void* customData )
+
+
Subscribe to kernel replay updates.
+
+
+ Parameters + +
+
+
updateFunc
+
The update function pointer
+
customData
+
Pointer to any custom data
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
+

+
+
+
Description
+

When subscribed, the function pointer passed in will be called each time a kernel run is finished during kernel replay. Previously + subscribed function pointer will be replaced. Pass in NULL as the function pointer unsubscribes the update. +

+

+
+
+
+ CUptiResult cuptiSetEventCollectionMode ( CUcontext context, CUpti_EventCollectionMode mode )
+
+
Set the event collection mode.
+
+
+ Parameters + +
+
+
context
+
The context
+
mode
+
The event collection mode
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_CONTEXT +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if called when replay mode is enabled

    +
  • +
  • CUPTI_ERROR_NOT_SUPPORTED +

    if mode is not supported on the device

    +
  • +
+

+
+
+
Description
+

Set the event collection mode for a context. The mode controls the event collection behavior of all events in event groups created in the context. This API is invalid in kernel replay mode. +

+

+

Note:

Thread-safety: this function is thread safe. +

+
+

+

+
+
+
+
+
+
+

5.6. CUPTI Metric API

+
+

Functions, types, and enums that implement the CUPTI Metric API.

+

+

Note:

CUPTI metric API from the header cupti_metrics.h are not supported on devices with compute capability 7.5 and higher (i.e. + Turing and later GPU architectures). These API will be deprecated in a future CUDA release. These are replaced by Profiling + API in the header cupti_profiler_target.h and Perfworks metrics API in the headers nvperf_host.h and nvperf_target.h which + are supported on devices with compute capability 7.0 and higher (i.e. Volta and later GPU architectures). +

+
+

+

+
+

Classes

+
+
union  +
+
A metric value.
+
+

Typedefs

+
+
typedef uint32_t  CUpti_MetricID
+
ID for a metric.
+
+

Enumerations

+
+
enum CUpti_MetricAttribute
+
Metric attributes.
+
enum CUpti_MetricCategory
+
A metric category.
+
enum CUpti_MetricEvaluationMode
+
A metric evaluation mode.
+
enum CUpti_MetricPropertyDeviceClass
+
Device class.
+
enum CUpti_MetricPropertyID
+
Metric device properties.
+
enum CUpti_MetricValueKind
+
Kinds of metric values.
+
enum CUpti_MetricValueUtilizationLevel
+
Enumeration of utilization levels for metrics values of kind CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL. Utilization values + can vary from IDLE (0) to MAX (10) but the enumeration only provides specific names for a few values.
+
+

Functions

+
+
CUptiResult cuptiDeviceEnumMetrics ( CUdevice device, size_t* arraySizeBytes, CUpti_MetricID* metricArray )
+
Get the metrics for a device.
+
CUptiResult cuptiDeviceGetNumMetrics ( CUdevice device, uint32_t* numMetrics )
+
Get the number of metrics for a device.
+
CUptiResult cuptiEnumMetrics ( size_t* arraySizeBytes, CUpti_MetricID* metricArray )
+
Get all the metrics available on any device.
+
CUptiResult cuptiGetNumMetrics ( uint32_t* numMetrics )
+
Get the total number of metrics available on any device.
+
CUptiResult cuptiMetricCreateEventGroupSets ( CUcontext context, size_t metricIdArraySizeBytes, CUpti_MetricID* metricIdArray, CUpti_EventGroupSets** eventGroupPasses )
+
For a set of metrics, get the grouping that indicates the number of passes and the event groups necessary to collect the events + required for those metrics.
+
CUptiResult cuptiMetricEnumEvents ( CUpti_MetricID metric, size_t* eventIdArraySizeBytes, CUpti_EventID* eventIdArray )
+
Get the events required to calculating a metric.
+
CUptiResult cuptiMetricEnumProperties ( CUpti_MetricID metric, size_t* propIdArraySizeBytes, CUpti_MetricPropertyID* propIdArray )
+
Get the properties required to calculating a metric.
+
CUptiResult cuptiMetricGetAttribute ( CUpti_MetricID metric, CUpti_MetricAttribute attrib, size_t* valueSize, void* value )
+
Get a metric attribute.
+
CUptiResult cuptiMetricGetIdFromName ( CUdevice device, const char* metricName, CUpti_MetricID* metric )
+
Find an metric by name.
+
CUptiResult cuptiMetricGetNumEvents ( CUpti_MetricID metric, uint32_t* numEvents )
+
Get number of events required to calculate a metric.
+
CUptiResult cuptiMetricGetNumProperties ( CUpti_MetricID metric, uint32_t* numProp )
+
Get number of properties required to calculate a metric.
+
CUptiResult cuptiMetricGetRequiredEventGroupSets ( CUcontext context, CUpti_MetricID metric, CUpti_EventGroupSets** eventGroupSets )
+
For a metric get the groups of events that must be collected in the same pass.
+
CUptiResult cuptiMetricGetValue ( CUdevice device, CUpti_MetricID metric, size_t eventIdArraySizeBytes, CUpti_EventID* eventIdArray, size_t eventValueArraySizeBytes, uint64_t* eventValueArray, uint64_t timeDuration, CUpti_MetricValue* metricValue )
+
Calculate the value for a metric.
+
CUptiResult cuptiMetricGetValue2 ( CUpti_MetricID metric, size_t eventIdArraySizeBytes, CUpti_EventID* eventIdArray, size_t eventValueArraySizeBytes, uint64_t* eventValueArray, size_t propIdArraySizeBytes, CUpti_MetricPropertyID* propIdArray, size_t propValueArraySizeBytes, uint64_t* propValueArray, CUpti_MetricValue* metricValue )
+
Calculate the value for a metric.
+
+
+

Typedefs

+
+
+ typedef uint32_t CUpti_MetricID
+
+
+

ID for a metric. A metric provides a measure of some aspect of the device.

+
+
+
+
+
+

Enumerations

+
+
+ enum CUpti_MetricAttribute
+
+
+

Metric attributes describe properties of a metric. These attributes can be read using cuptiMetricGetAttribute. +

+
+
+
+ Values + +
+
+
CUPTI_METRIC_ATTR_NAME = 0
+
Metric name. Value is a null terminated const c-string.
+
CUPTI_METRIC_ATTR_SHORT_DESCRIPTION = 1
+
Short description of metric. Value is a null terminated const c-string.
+
CUPTI_METRIC_ATTR_LONG_DESCRIPTION = 2
+
Long description of metric. Value is a null terminated const c-string.
+
CUPTI_METRIC_ATTR_CATEGORY = 3
+
Category of the metric. Value is of type CUpti_MetricCategory.
+
CUPTI_METRIC_ATTR_VALUE_KIND = 4
+
Value type of the metric. Value is of type CUpti_MetricValueKind.
+
CUPTI_METRIC_ATTR_EVALUATION_MODE = 5
+
Metric evaluation mode. Value is of type CUpti_MetricEvaluationMode.
+
CUPTI_METRIC_ATTR_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_MetricCategory
+
+
+

Each metric is assigned to a category that represents the general type of the metric. A metric's category is accessed using + cuptiMetricGetAttribute and the CUPTI_METRIC_ATTR_CATEGORY attribute. +

+
+
+
+ Values + +
+
+
CUPTI_METRIC_CATEGORY_MEMORY = 0
+
A memory related metric.
+
CUPTI_METRIC_CATEGORY_INSTRUCTION = 1
+
An instruction related metric.
+
CUPTI_METRIC_CATEGORY_MULTIPROCESSOR = 2
+
A multiprocessor related metric.
+
CUPTI_METRIC_CATEGORY_CACHE = 3
+
A cache related metric.
+
CUPTI_METRIC_CATEGORY_TEXTURE = 4
+
A texture related metric.
+
CUPTI_METRIC_CATEGORY_NVLINK = 5
+
A Nvlink related metric.
+
CUPTI_METRIC_CATEGORY_PCIE = 6
+
A PCIe related metric.
+
CUPTI_METRIC_CATEGORY_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_MetricEvaluationMode
+
+
+

A metric can be evaluated per hardware instance to know the load balancing across instances of a domain or the metric can + be evaluated in aggregate mode when the events involved in metric evaluation are from different event domains. It might be + possible to evaluate some metrics in both modes for convenience. A metric's evaluation mode is accessed using CUpti_MetricEvaluationMode and the CUPTI_METRIC_ATTR_EVALUATION_MODE attribute. +

+
+
+
+ Values + +
+
+
CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE = 1
+
If this bit is set, the metric can be profiled for each instance of the domain. The event values passed to cuptiMetricGetValue can contain values for one instance of the domain. And cuptiMetricGetValue can be called for each instance. +
+
CUPTI_METRIC_EVALUATION_MODE_AGGREGATE = 1<<1
+
If this bit is set, the metric can be profiled over all instances. The event values passed to cuptiMetricGetValue can be aggregated values of events for all instances of the domain. +
+
CUPTI_METRIC_EVALUATION_MODE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_MetricPropertyDeviceClass
+
+
+

Enumeration of device classes for metric property CUPTI_METRIC_PROPERTY_DEVICE_CLASS.

+
+
+
+ Values + +
+
+
CUPTI_METRIC_PROPERTY_DEVICE_CLASS_TESLA = 0
+
+
CUPTI_METRIC_PROPERTY_DEVICE_CLASS_QUADRO = 1
+
+
CUPTI_METRIC_PROPERTY_DEVICE_CLASS_GEFORCE = 2
+
+
CUPTI_METRIC_PROPERTY_DEVICE_CLASS_TEGRA = 3
+
+
+
+
+
+ enum CUpti_MetricPropertyID
+
+
+

Metric device properties describe device properties which are needed for a metric. Some of these properties can be collected + using cuDeviceGetAttribute. +

+
+
+
+ Values + +
+
+
CUPTI_METRIC_PROPERTY_MULTIPROCESSOR_COUNT
+
+
CUPTI_METRIC_PROPERTY_WARPS_PER_MULTIPROCESSOR
+
+
CUPTI_METRIC_PROPERTY_KERNEL_GPU_TIME
+
+
CUPTI_METRIC_PROPERTY_CLOCK_RATE
+
+
CUPTI_METRIC_PROPERTY_FRAME_BUFFER_COUNT
+
+
CUPTI_METRIC_PROPERTY_GLOBAL_MEMORY_BANDWIDTH
+
+
CUPTI_METRIC_PROPERTY_PCIE_LINK_RATE
+
+
CUPTI_METRIC_PROPERTY_PCIE_LINK_WIDTH
+
+
CUPTI_METRIC_PROPERTY_PCIE_GEN
+
+
CUPTI_METRIC_PROPERTY_DEVICE_CLASS
+
+
CUPTI_METRIC_PROPERTY_FLOP_SP_PER_CYCLE
+
+
CUPTI_METRIC_PROPERTY_FLOP_DP_PER_CYCLE
+
+
CUPTI_METRIC_PROPERTY_L2_UNITS
+
+
CUPTI_METRIC_PROPERTY_ECC_ENABLED
+
+
CUPTI_METRIC_PROPERTY_FLOP_HP_PER_CYCLE
+
+
CUPTI_METRIC_PROPERTY_GPU_CPU_NVLINK_BANDWIDTH
+
+
+
+
+
+ enum CUpti_MetricValueKind
+
+
+

Metric values can be one of several different kinds. Corresponding to each kind is a member of the CUpti_MetricValue union. The metric value returned by cuptiMetricGetValue should be accessed using the appropriate member of that union based on its value kind. +

+
+
+
+ Values + +
+
+
CUPTI_METRIC_VALUE_KIND_DOUBLE = 0
+
The metric value is a 64-bit double.
+
CUPTI_METRIC_VALUE_KIND_UINT64 = 1
+
The metric value is a 64-bit unsigned integer.
+
CUPTI_METRIC_VALUE_KIND_PERCENT = 2
+
The metric value is a percentage represented by a 64-bit double. For example, 57.5% is represented by the value 57.5.
+
CUPTI_METRIC_VALUE_KIND_THROUGHPUT = 3
+
The metric value is a throughput represented by a 64-bit integer. The unit for throughput values is bytes/second.
+
CUPTI_METRIC_VALUE_KIND_INT64 = 4
+
The metric value is a 64-bit signed integer.
+
CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL = 5
+
The metric value is a utilization level, as represented by CUpti_MetricValueUtilizationLevel.
+
CUPTI_METRIC_VALUE_KIND_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_MetricValueUtilizationLevel
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_METRIC_VALUE_UTILIZATION_IDLE = 0
+
+
CUPTI_METRIC_VALUE_UTILIZATION_LOW = 2
+
+
CUPTI_METRIC_VALUE_UTILIZATION_MID = 5
+
+
CUPTI_METRIC_VALUE_UTILIZATION_HIGH = 8
+
+
CUPTI_METRIC_VALUE_UTILIZATION_MAX = 10
+
+
CUPTI_METRIC_VALUE_UTILIZATION_FORCE_INT = 0x7fffffff
+
+
+
+
+
+
+
+

Functions

+
+
+ CUptiResult cuptiDeviceEnumMetrics ( CUdevice device, size_t* arraySizeBytes, CUpti_MetricID* metricArray )
+
+
Get the metrics for a device.
+
+
+ Parameters + +
+
+
device
+
The CUDA device
+
arraySizeBytes
+
The size of metricArray in bytes, and returns the number of bytes written to metricArray
+
metricArray
+
Returns the IDs of the metrics for the device
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_DEVICE +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if arraySizeBytes or metricArray are NULL +

    +
  • +
+

+
+
+
Description
+

Returns the metric IDs in metricArray for a device. The size of the metricArray buffer is given by *arraySizeBytes. The size of the metricArray buffer must be at least numMetrics * sizeof(CUpti_MetricID) or else all metric IDs will not be returned. The value returned in *arraySizeBytes contains the number of bytes returned in metricArray. +

+

+
+
+
+ CUptiResult cuptiDeviceGetNumMetrics ( CUdevice device, uint32_t* numMetrics )
+
+
Get the number of metrics for a device.
+
+
+ Parameters + +
+
+
device
+
The CUDA device
+
numMetrics
+
Returns the number of metrics available for the device
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_DEVICE +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if numMetrics is NULL +

    +
  • +
+

+
+
+
Description
+

Returns the number of metrics available for a device.

+

+
+
+
+ CUptiResult cuptiEnumMetrics ( size_t* arraySizeBytes, CUpti_MetricID* metricArray )
+
+
Get all the metrics available on any device.
+
+
+ Parameters + +
+
+
arraySizeBytes
+
The size of metricArray in bytes, and returns the number of bytes written to metricArray
+
metricArray
+
Returns the IDs of the metrics
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if arraySizeBytes or metricArray are NULL +

    +
  • +
+

+
+
+
Description
+

Returns the metric IDs in metricArray for all CUDA-capable devices. The size of the metricArray buffer is given by *arraySizeBytes. The size of the metricArray buffer must be at least numMetrics * sizeof(CUpti_MetricID) or all metric IDs will not be returned. The value returned in *arraySizeBytes contains the number of bytes returned in metricArray. +

+

+
+
+
+ CUptiResult cuptiGetNumMetrics ( uint32_t* numMetrics )
+
+
Get the total number of metrics available on any device.
+
+
+ Parameters + +
+
+
numMetrics
+
Returns the number of metrics
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if numMetrics is NULL +

    +
  • +
+

+
+
+
Description
+

Returns the total number of metrics available on any CUDA-capable devices.

+

+
+
+
+ CUptiResult cuptiMetricCreateEventGroupSets ( CUcontext context, size_t metricIdArraySizeBytes, CUpti_MetricID* metricIdArray, CUpti_EventGroupSets** eventGroupPasses )
+
+
For a set of metrics, get the grouping that indicates the number of passes and the event groups necessary to collect the events + required for those metrics. +
+
+
+ Parameters + +
+
+
context
+
The context for event collection
+
metricIdArraySizeBytes
+
Size of the metricIdArray in bytes
+
metricIdArray
+
Array of metric IDs
+
eventGroupPasses
+
Returns a CUpti_EventGroupSets object that indicates the number of passes required to collect the events and the events to collect on each pass +
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_CONTEXT +

    +
  • +
  • CUPTI_ERROR_INVALID_METRIC_ID +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if metricIdArray or eventGroupPasses is NULL +

    +
  • +
+

+
+
+
Description
+

For a set of metrics, get the grouping that indicates the number of passes and the event groups necessary to collect the events + required for those metrics. +

+

See also:

+

cuptiEventGroupSetsCreate for details on event group set creation. +

+

+
+
+
+ CUptiResult cuptiMetricEnumEvents ( CUpti_MetricID metric, size_t* eventIdArraySizeBytes, CUpti_EventID* eventIdArray )
+
+
Get the events required to calculating a metric.
+
+
+ Parameters + +
+
+
metric
+
ID of the metric
+
eventIdArraySizeBytes
+
The size of eventIdArray in bytes, and returns the number of bytes written to eventIdArray
+
eventIdArray
+
Returns the IDs of the events required to calculate metric
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_METRIC_ID +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if eventIdArraySizeBytes or eventIdArray are NULL. +

    +
  • +
+

+
+
+
Description
+

Gets the event IDs in eventIdArray required to calculate a metric. The size of the eventIdArray buffer is given by *eventIdArraySizeBytes and must be at least numEvents * sizeof(CUpti_EventID) or all events will not be returned. The value returned in *eventIdArraySizeBytes contains the number of bytes returned in eventIdArray. +

+

+
+
+
+ CUptiResult cuptiMetricEnumProperties ( CUpti_MetricID metric, size_t* propIdArraySizeBytes, CUpti_MetricPropertyID* propIdArray )
+
+
Get the properties required to calculating a metric.
+
+
+ Parameters + +
+
+
metric
+
ID of the metric
+
propIdArraySizeBytes
+
The size of propIdArray in bytes, and returns the number of bytes written to propIdArray
+
propIdArray
+
Returns the IDs of the properties required to calculate metric
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_METRIC_ID +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if propIdArraySizeBytes or propIdArray are NULL. +

    +
  • +
+

+
+
+
Description
+

Gets the property IDs in propIdArray required to calculate a metric. The size of the propIdArray buffer is given by *propIdArraySizeBytes and must be at least numProp * sizeof(CUpti_DeviceAttribute) or all properties will not be returned. The value returned in *propIdArraySizeBytes contains the number of bytes returned in propIdArray. +

+

+
+
+
+ CUptiResult cuptiMetricGetAttribute ( CUpti_MetricID metric, CUpti_MetricAttribute attrib, size_t* valueSize, void* value )
+
+
Get a metric attribute.
+
+
+ Parameters + +
+
+
metric
+
ID of the metric
+
attrib
+
The metric attribute to read
+
valueSize
+
The size of the value buffer in bytes, and returns the number of bytes written to value
+
value
+
Returns the attribute's value
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_METRIC_ID +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if valueSize or value is NULL, or if attrib is not a metric attribute +

    +
  • +
  • CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT +

    For non-c-string attribute values, indicates that the value buffer is too small to hold the attribute value. +

    +
  • +
+

+
+
+
Description
+

Returns a metric attribute in *value. The size of the value buffer is given by *valueSize. The value returned in *valueSize contains the number of bytes returned in value. +

+

If the attribute value is a c-string that is longer than *valueSize, then only the first *valueSize characters will be returned and there will be no terminating null byte. +

+

+
+
+
+ CUptiResult cuptiMetricGetIdFromName ( CUdevice device, const char* metricName, CUpti_MetricID* metric )
+
+
Find an metric by name.
+
+
+ Parameters + +
+
+
device
+
The CUDA device
+
metricName
+
The name of metric to find
+
metric
+
Returns the ID of the found metric or undefined if unable to find the metric
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_DEVICE +

    +
  • +
  • CUPTI_ERROR_INVALID_METRIC_NAME +

    if unable to find a metric with name metricName. In this case *metric is undefined +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if metricName or metric are NULL. +

    +
  • +
+

+
+
+
Description
+

Find a metric by name and return the metric ID in *metric. +

+

+
+
+
+ CUptiResult cuptiMetricGetNumEvents ( CUpti_MetricID metric, uint32_t* numEvents )
+
+
Get number of events required to calculate a metric.
+
+
+ Parameters + +
+
+
metric
+
ID of the metric
+
numEvents
+
Returns the number of events required for the metric
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_METRIC_ID +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if numEvents is NULL +

    +
  • +
+

+
+
+
Description
+

Returns the number of events in numEvents that are required to calculate a metric. +

+

+
+
+
+ CUptiResult cuptiMetricGetNumProperties ( CUpti_MetricID metric, uint32_t* numProp )
+
+
Get number of properties required to calculate a metric.
+
+
+ Parameters + +
+
+
metric
+
ID of the metric
+
numProp
+
Returns the number of properties required for the metric
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_METRIC_ID +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if numProp is NULL +

    +
  • +
+

+
+
+
Description
+

Returns the number of properties in numProp that are required to calculate a metric. +

+

+
+
+
+ CUptiResult cuptiMetricGetRequiredEventGroupSets ( CUcontext context, CUpti_MetricID metric, CUpti_EventGroupSets** eventGroupSets )
+
+
For a metric get the groups of events that must be collected in the same pass.
+
+
+ Parameters + +
+
+
context
+
The context for event collection
+
metric
+
The metric ID
+
eventGroupSets
+
Returns a CUpti_EventGroupSets object that indicates the events that must be collected in the same pass to ensure the metric is calculated correctly. Returns + NULL if no grouping is required for metric +
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_METRIC_ID +

    +
  • +
+

+
+
+
Description
+

For a metric get the groups of events that must be collected in the same pass to ensure that the metric is calculated correctly. + If the events are not collected as specified then the metric value may be inaccurate. +

+

The function returns NULL if a metric does not have any required event group. In this case the events needed for the metric + can be grouped in any manner for collection. +

+

+
+
+
+ CUptiResult cuptiMetricGetValue ( CUdevice device, CUpti_MetricID metric, size_t eventIdArraySizeBytes, CUpti_EventID* eventIdArray, size_t eventValueArraySizeBytes, uint64_t* eventValueArray, uint64_t timeDuration, CUpti_MetricValue* metricValue )
+
+
Calculate the value for a metric.
+
+
+ Parameters + +
+
+
device
+
The CUDA device that the metric is being calculated for
+
metric
+
The metric ID
+
eventIdArraySizeBytes
+
The size of eventIdArray in bytes +
+
eventIdArray
+
The event IDs required to calculate metric
+
eventValueArraySizeBytes
+
The size of eventValueArray in bytes +
+
eventValueArray
+
The normalized event values required to calculate metric. The values must be order to match the order of events in eventIdArray
+
timeDuration
+
The duration over which the events were collected, in ns
+
metricValue
+
Returns the value for the metric
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_METRIC_ID +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    +
  • +
  • CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT +

    if the eventIdArray does not contain all the events needed for metric

    +
  • +
  • CUPTI_ERROR_INVALID_EVENT_VALUE +

    if any of the event values required for the metric is CUPTI_EVENT_OVERFLOW

    +
  • +
  • CUPTI_ERROR_INVALID_METRIC_VALUE +

    if the computed metric value cannot be represented in the metric's value type. For example, if the metric value type is unsigned + and the computed metric value is negative +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if metricValue, eventIdArray or eventValueArray is NULL +

    +
  • +
+

+
+
+
Description
+

Use the events collected for a metric to calculate the metric value. Metric value evaluation depends on the evaluation mode + CUpti_MetricEvaluationMode that the metric supports. If a metric has evaluation mode as CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE, then it assumes that + the input event value is for one domain instance. If a metric has evaluation mode as CUPTI_METRIC_EVALUATION_MODE_AGGREGATE, + it assumes that input event values are normalized to represent all domain instances on a device. For the most accurate metric + collection, the events required for the metric should be collected for all profiled domain instances. For example, to collect + all instances of an event, set the CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES attribute on the group containing the + event to 1. The normalized value for the event is then: (sum_event_values * totalInstanceCount) / instanceCount, where sum_event_values is the summation of the event values across all profiled domain instances, totalInstanceCount is obtained from querying CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT and instanceCount is obtained from querying CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT (or CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT). +

+

+
+
+
+ CUptiResult cuptiMetricGetValue2 ( CUpti_MetricID metric, size_t eventIdArraySizeBytes, CUpti_EventID* eventIdArray, size_t eventValueArraySizeBytes, uint64_t* eventValueArray, size_t propIdArraySizeBytes, CUpti_MetricPropertyID* propIdArray, size_t propValueArraySizeBytes, uint64_t* propValueArray, CUpti_MetricValue* metricValue )
+
+
Calculate the value for a metric.
+
+
+ Parameters + +
+
+
metric
+
The metric ID
+
eventIdArraySizeBytes
+
The size of eventIdArray in bytes +
+
eventIdArray
+
The event IDs required to calculate metric
+
eventValueArraySizeBytes
+
The size of eventValueArray in bytes +
+
eventValueArray
+
The normalized event values required to calculate metric. The values must be order to match the order of events in eventIdArray
+
propIdArraySizeBytes
+
The size of propIdArray in bytes +
+
propIdArray
+
The metric property IDs required to calculate metric
+
propValueArraySizeBytes
+
The size of propValueArray in bytes +
+
propValueArray
+
The metric property values required to calculate metric. The values must be order to match the order of metric properties in propIdArray
+
metricValue
+
Returns the value for the metric
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    +
  • +
  • CUPTI_ERROR_INVALID_METRIC_ID +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    +
  • +
  • CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT +

    if the eventIdArray does not contain all the events needed for metric

    +
  • +
  • CUPTI_ERROR_INVALID_EVENT_VALUE +

    if any of the event values required for the metric is CUPTI_EVENT_OVERFLOW

    +
  • +
  • CUPTI_ERROR_NOT_COMPATIBLE +

    if the computed metric value cannot be represented in the metric's value type. For example, if the metric value type is unsigned + and the computed metric value is negative +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if metricValue, eventIdArray or eventValueArray is NULL +

    +
  • +
+

+
+
+
Description
+

Use the events and properties collected for a metric to calculate the metric value. Metric value evaluation depends on the + evaluation mode CUpti_MetricEvaluationMode that the metric supports. If a metric has evaluation mode as CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE, then it assumes that + the input event value is for one domain instance. If a metric has evaluation mode as CUPTI_METRIC_EVALUATION_MODE_AGGREGATE, + it assumes that input event values are normalized to represent all domain instances on a device. For the most accurate metric + collection, the events required for the metric should be collected for all profiled domain instances. For example, to collect + all instances of an event, set the CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES attribute on the group containing the + event to 1. The normalized value for the event is then: (sum_event_values * totalInstanceCount) / instanceCount, where sum_event_values is the summation of the event values across all profiled domain instances, totalInstanceCount is obtained from querying CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT and instanceCount is obtained from querying CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT (or CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT). +

+

+
+
+
+
+
+
+

5.7. CUPTI Profiling API

+
+

Functions, types, and enums that implement the CUPTI Profiling API.

+
+

Classes

+
+
struct  +
+
Params for cuptiProfilerBeginPass.
+
struct  +
+
Params for cuptiProfilerBeginSession.
+
struct  +
+
Input parameter to define the counterDataImage.
+
struct  +
+
Params for cuptiProfilerCounterDataImageCalculateScratchBufferSize.
+
struct  +
+
Params for cuptiProfilerCounterDataImageCalculateSize.
+
struct  +
+
Params for cuptiProfilerCounterDataImageInitializeScratchBuffer.
+
struct  +
+
Params for cuptiProfilerCounterDataImageInitialize.
+
struct  +
+
Default parameter for cuptiProfilerDeInitialize.
+
struct  +
+
Params for cuptiProfilerDeviceSupported.
+
struct  +
+
Params for cuptiProfilerDisableProfiling.
+
struct  +
+
Params for cuptiProfilerEnableProfiling.
+
struct  +
+
Params for cuptiProfilerEndPass.
+
struct  +
+
Params for cuptiProfilerEndSession.
+
struct  +
+
Params for cuptiProfilerFlushCounterData.
+
struct  +
+
Params for cuptiProfilerGetCounterAvailability.
+
struct  +
+
Default parameter for cuptiProfilerInitialize.
+
struct  +
+
Params for cuptiProfilerIsPassCollected.
+
struct  +
+
Params for cuptiProfilerSetConfig.
+
struct  +
+
Params for cuptiProfilerUnsetConfig.
+
+

Enumerations

+
+
enum CUpti_ProfilerRange
+
Profiler range attribute.
+
enum CUpti_ProfilerReplayMode
+
Profiler replay attribute.
+
enum CUpti_Profiler_Support_Level
+
Generic support level enum for CUPTI.
+
+

Functions

+
+
CUptiResult cuptiProfilerBeginPass ( CUpti_Profiler_BeginPass_Params* pParams )
+
Replay API: used for multipass collection.
+
CUptiResult cuptiProfilerBeginSession ( CUpti_Profiler_BeginSession_Params* pParams )
+
Begin profiling session sets up the profiling on the device.
+
CUptiResult cuptiProfilerCounterDataImageCalculateScratchBufferSize ( CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params* pParams )
+
A temporary storage for CounterData image needed for internal operations.
+
CUptiResult cuptiProfilerCounterDataImageCalculateSize ( CUpti_Profiler_CounterDataImage_CalculateSize_Params* pParams )
+
A CounterData image allocates space for values for each counter for each range.
+
CUptiResult cuptiProfilerDeInitialize ( CUpti_Profiler_DeInitialize_Params* pParams )
+
DeInitializes the profiler interface.
+
CUptiResult cuptiProfilerDeviceSupported ( CUpti_Profiler_DeviceSupported_Params* pParams )
+
Query device compatibility with Profiling API.
+
CUptiResult cuptiProfilerDisableProfiling ( CUpti_Profiler_DisableProfiling_Params* pParams )
+
Disable Profiling.
+
CUptiResult cuptiProfilerEnableProfiling ( CUpti_Profiler_EnableProfiling_Params* pParams )
+
Enables Profiling.
+
CUptiResult cuptiProfilerEndPass ( CUpti_Profiler_EndPass_Params* pParams )
+
Replay API: used for multipass collection.
+
CUptiResult cuptiProfilerEndSession ( CUpti_Profiler_EndSession_Params* pParams )
+
Ends profiling session.
+
CUptiResult cuptiProfilerFlushCounterData ( CUpti_Profiler_FlushCounterData_Params* pParams )
+
Decode all the submitted passes.
+
CUptiResult cuptiProfilerGetCounterAvailability ( CUpti_Profiler_GetCounterAvailability_Params* pParams )
+
Query counter availibility.
+
CUptiResult cuptiProfilerInitialize ( CUpti_Profiler_Initialize_Params* pParams )
+
Initializes the profiler interface.
+
CUptiResult cuptiProfilerIsPassCollected ( CUpti_Profiler_IsPassCollected_Params* pParams )
+
Asynchronous call to query if the submitted pass to GPU is collected.
+
CUptiResult cuptiProfilerPopRange ( CUpti_Profiler_PopRange_Params* pParams )
+
Range API's : Pop user range.
+
CUptiResult cuptiProfilerPushRange ( CUpti_Profiler_PushRange_Params* pParams )
+
Range API's : Push user range.
+
CUptiResult cuptiProfilerSetConfig ( CUpti_Profiler_SetConfig_Params* pParams )
+
Set metrics configuration to be profiled.
+
CUptiResult cuptiProfilerUnsetConfig ( CUpti_Profiler_UnsetConfig_Params* pParams )
+
Unset metrics configuration profiled.
+
+
+

Enumerations

+
+
+ enum CUpti_ProfilerRange
+
+
+

A metric enabled in the session's configuration is collected separately per unique range-stack in the pass. This is an attribute + to collect metrics around each kernel in a profiling session or in an user defined range. +

+
+
+
+ Values + +
+
+
CUPTI_Range_INVALID
+
Invalid value
+
CUPTI_AutoRange
+
Ranges are auto defined around each kernel in a profiling session
+
CUPTI_UserRange
+
A range in which metric data to be collected is defined by the user
+
CUPTI_Range_COUNT
+
Range count
+
+
+
+
+ enum CUpti_ProfilerReplayMode
+
+
+

For metrics which require multipass collection, a replay of the GPU kernel(s) is required. This is an attribute which specify + how the replay of the kernel(s) to be measured is done. +

+
+
+
+ Values + +
+
+
CUPTI_Replay_INVALID
+
Invalid Value
+
CUPTI_ApplicationReplay
+
Replay is done by CUPTI user around the process
+
CUPTI_KernelReplay
+
Replay is done around kernel implicitly by CUPTI
+
CUPTI_UserReplay
+
Replay is done by CUPTI user within a process
+
CUPTI_Replay_COUNT
+
Replay count
+
+
+
+
+ enum CUpti_Profiler_Support_Level
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_PROFILER_CONFIGURATION_UNKNOWN = 0
+
Configuration support level unknown - either detection code errored out before setting this value, or unable to determine + it. +
+
CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED
+
Profiling is unavailable. For specific feature fields, this means that the current configuration of this feature does not + work with profiling. For instance, SLI-enabled devices do not support profiling, and this value would be returned for SLI + on an SLI-enabled device. +
+
CUPTI_PROFILER_CONFIGURATION_DISABLED
+
Profiling would be available for this configuration, but was disabled by the system.
+
CUPTI_PROFILER_CONFIGURATION_SUPPORTED
+
Profiling is supported. For specific feature fields, this means that the current configuration of this feature works with + profiling. For instance, SLI-enabled devices do not support profiling, and this value would only be returned for devices which + are not SLI-enabled. +
+
+
+
+
+
+
+

Functions

+
+
+ CUptiResult cuptiProfilerBeginPass ( CUpti_Profiler_BeginPass_Params* pParams )
+
+
Replay API: used for multipass collection.
+
+
Description
+

These APIs are used if user chooses to replay by itself CUPTI_UserReplay or CUPTI_ApplicationReplay for multipass collection of the metrics configurations. It's a no-op in case of CUPTI_KernelReplay. +

+
+
+
+ CUptiResult cuptiProfilerBeginSession ( CUpti_Profiler_BeginSession_Params* pParams )
+
+
Begin profiling session sets up the profiling on the device.
+
+
Description
+

Although, it doesn't start the profiling but GPU resources needed for profiling are allocated. Outside of a session, the GPU + will return to its normal operating state. +

+
+
+
+ CUptiResult cuptiProfilerCounterDataImageCalculateScratchBufferSize ( CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params* pParams )
+
+
A temporary storage for CounterData image needed for internal operations.
+
+
Description
+

Use these APIs to calculate the allocation size and initialize counterData image scratch buffer.

+
+
+
+ CUptiResult cuptiProfilerCounterDataImageCalculateSize ( CUpti_Profiler_CounterDataImage_CalculateSize_Params* pParams )
+
+
A CounterData image allocates space for values for each counter for each range.
+
+
Description
+

User borne the resposibility of managing the counterDataImage allocations. CounterDataPrefix contains meta data about the + metrics that will be stored in counterDataImage. Use these APIs to calculate the allocation size and initialize counterData + image. +

+
+
+
+ CUptiResult cuptiProfilerDeInitialize ( CUpti_Profiler_DeInitialize_Params* pParams )
+
+
DeInitializes the profiler interface.
+
+
Description
+

+
+
+
+ CUptiResult cuptiProfilerDeviceSupported ( CUpti_Profiler_DeviceSupported_Params* pParams )
+
+
Query device compatibility with Profiling API.
+
+
Description
+

Use this call to determine whether a compute device and configuration are compatible with the Profiling API. If the configuration + does not support profiling, one of several flags will indicate why. +

+
+
+
+ CUptiResult cuptiProfilerDisableProfiling ( CUpti_Profiler_DisableProfiling_Params* pParams )
+
+
Disable Profiling.
+
+
Description
+

In CUPTI_AutoRange, these APIs are used to enable/disable profiling for the kernels to be executed in a profiling session. +

+
+
+
+ CUptiResult cuptiProfilerEnableProfiling ( CUpti_Profiler_EnableProfiling_Params* pParams )
+
+
Enables Profiling.
+
+
Description
+

In CUPTI_AutoRange, these APIs are used to enable/disable profiling for the kernels to be executed in a profiling session. +

+
+
+
+ CUptiResult cuptiProfilerEndPass ( CUpti_Profiler_EndPass_Params* pParams )
+
+
Replay API: used for multipass collection.
+
+
Description
+

These APIs are used if user chooses to replay by itself CUPTI_UserReplay or CUPTI_ApplicationReplay for multipass collection of the metrics configurations. Its a no-op in case of CUPTI_KernelReplay. Returns information for next pass. +

+
+
+
+ CUptiResult cuptiProfilerEndSession ( CUpti_Profiler_EndSession_Params* pParams )
+
+
Ends profiling session.
+
+
Description
+

Frees up the GPU resources acquired for profiling. Outside of a session, the GPU will return to it's normal operating state. + +

+
+
+
+ CUptiResult cuptiProfilerFlushCounterData ( CUpti_Profiler_FlushCounterData_Params* pParams )
+
+
Decode all the submitted passes.
+
+
Description
+

Flush Counter data API to ensure every pass is decoded into the counterDataImage passed at beginSession. This will cause the + CPU/GPU sync to collect all the undecoded pass. +

+
+
+
+ CUptiResult cuptiProfilerGetCounterAvailability ( CUpti_Profiler_GetCounterAvailability_Params* pParams )
+
+
Query counter availibility.
+
+
Description
+

Use this API to query counter availability information in a buffer which can be used to filter unavailable raw metrics on + host. Note: This API may fail, if any profiling or sampling session is active on the specified context or its device. +

+
+
+
+ CUptiResult cuptiProfilerInitialize ( CUpti_Profiler_Initialize_Params* pParams )
+
+
Initializes the profiler interface.
+
+
Description
+

Loads the required libraries in the process address space. Sets up the hooks with the CUDA driver.

+
+
+
+ CUptiResult cuptiProfilerIsPassCollected ( CUpti_Profiler_IsPassCollected_Params* pParams )
+
+
Asynchronous call to query if the submitted pass to GPU is collected.
+
+
Description
+

+
+
+
+ CUptiResult cuptiProfilerPopRange ( CUpti_Profiler_PopRange_Params* pParams )
+
+
Range API's : Pop user range.
+
+
Description
+

Counter data is collected per unique range-stack. Identified by a string label passsed by the user. It's an invalid operation + in case of CUPTI_AutoRange. +

+
+
+
+ CUptiResult cuptiProfilerPushRange ( CUpti_Profiler_PushRange_Params* pParams )
+
+
Range API's : Push user range.
+
+
Description
+

Counter data is collected per unique range-stack. Identified by a string label passsed by the user. It's an invalid operation + in case of CUPTI_AutoRange. +

+
+
+
+ CUptiResult cuptiProfilerSetConfig ( CUpti_Profiler_SetConfig_Params* pParams )
+
+
Set metrics configuration to be profiled.
+
+
Description
+

Use these APIs to set the config to profile in a session. It can be used for advanced cases such as where multiple configurations + are collected into a single CounterData Image on the need basis, without restarting the session. +

+
+
+
+ CUptiResult cuptiProfilerUnsetConfig ( CUpti_Profiler_UnsetConfig_Params* pParams )
+
+
Unset metrics configuration profiled.
+
+
Description
+

+
+
+
+
+
+
+

5.8. CUPTI Checkpoint API

+
+

Functions, types, and enums that implement the CUPTI Checkpoint API.

+
+

Classes

+
+
struct  +
+
Configuration and handle for a CUPTI Checkpoint.
+
+

Enumerations

+
+
enum NV::Cupti::Checkpoint::CUpti_CheckpointOptimizations
+
Specifies optimization options for a checkpoint, may be OR'd together to specify multiple options.
+
+

Functions

+
+
CUptiResult NV::Cupti::Checkpoint::cuptiCheckpointFree ( const CUpti_Checkpoint* handle )
+
Free the backing data for a checkpoint.
+
CUptiResult NV::Cupti::Checkpoint::cuptiCheckpointRestore ( const CUpti_Checkpoint* handle )
+
Restore a checkpoint to the device associated with its context.
+
CUptiResult NV::Cupti::Checkpoint::cuptiCheckpointSave ( const CUpti_Checkpoint* handle )
+
Initialize and save a checkpoint of the device state associated with the handle context.
+
+
+

Enumerations

+
+
+ enum NV::Cupti::Checkpoint::CUpti_CheckpointOptimizations
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_CHECKPOINT_OPT_NONE = 0
+
Default behavior.
+
CUPTI_CHECKPOINT_OPT_TRANSFER = 1
+
Determine which mem blocks have changed, and only restore those. This optimization is cached, which means cuptiCheckpointRestore + must always be called at the same point in the application when this option is enabled, or the result may be incorrect. +
+
+
+
+
+
+
+

Functions

+
+
+ CUptiResult NV::Cupti::Checkpoint::cuptiCheckpointFree ( const CUpti_Checkpoint* handle )
+
+
Free the backing data for a checkpoint.
+
+
+ Parameters + +
+
+
handle
+
A pointer to a previously saved CUpti_Checkpoint object +
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    if the handle was successfully freed

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if the handle was already freed or appears invalid

    +
  • +
  • CUPTI_ERROR_INVALID_CONTEXT +

    if the context is no longer valid

    +
  • +
+

+
+
+
Description
+

Frees all associated device, host memory and filesystem storage used for this context. After freeing a handle, it may be re-used + as if it was new - options may be re-configured and will take effect on the next call to cuptiCheckpointSave. +

+

+
+
+
+ CUptiResult NV::Cupti::Checkpoint::cuptiCheckpointRestore ( const CUpti_Checkpoint* handle )
+
+
Restore a checkpoint to the device associated with its context.
+
+
+ Parameters + +
+
+
handle
+
A pointer to a previously saved CUpti_Checkpoint object +
+
+
+
+
Returns
+

+

    +
  • CUTPI_SUCCESS +

    if the checkpoint was successfully restored

    +
  • +
  • CUPTI_ERROR_NOT_INITIALIZED +

    if the checkpoint was not previously initialized

    +
  • +
  • CUPTI_ERROR_INVALID_CONTEXT +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if the handle appears invalid

    +
  • +
  • CUPTI_ERROR_UNKNOWN +

    if the restore or optimization operation fails

    +
  • +
+

+
+
+
Description
+

Restores device, pinned, and allocated memory to the state when the checkpoint was saved

+

+
+
+
+ CUptiResult NV::Cupti::Checkpoint::cuptiCheckpointSave ( const CUpti_Checkpoint* handle )
+
+
Initialize and save a checkpoint of the device state associated with the handle context.
+
+
+ Parameters + +
+
+
handle
+
A pointer to a CUpti_Checkpoint object +
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    if a checkpoint was successfully initialized and saved

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if handle does not appear to refer to a valid CUpti_Checkpoint

    +
  • +
  • CUPTI_ERROR_INVALID_CONTEXT +

    +
  • +
  • CUPTI_ERROR_INVALID_DEVICE +

    if device associated with context is not compatible with checkpoint API

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if Save is requested over an existing checkpoint, but allowOverwrite was not originally specified +

    +
  • +
  • CUPTI_ERROR_OUT_OF_MEMORY +

    if as configured, not enough backing storage space to save the checkpoint

    +
  • +
+

+
+
+
Description
+

Uses the handle options to configure and save a checkpoint of the device state associated with the specified context.

+

+
+
+
+
+
+
+

5.9. CUPTI PC Sampling API

+
+

Functions, types, and enums that implement the CUPTI PC Sampling API.

+
+

Classes

+
+
struct  +
+
Params for cuptiGetCubinCrc.
+
struct  +
+
Params for cuptiGetSassToSourceCorrelation.
+
struct  +
+
PC sampling configuration information structure.
+
struct  +
+
PC sampling configuration structure.
+
struct  +
+
Collected PC Sampling data.
+
struct  +
+
Params for cuptiPCSamplingDisable.
+
struct  +
+
Params for cuptiPCSamplingEnable.
+
struct  +
+
Params for cuptiPCSamplingEnable.
+
struct  +
+
Params for cuptiPCSamplingGetNumStallReasons.
+
struct  +
+
Params for cuptiPCSamplingGetStallReasons.
+
struct  +
+
PC Sampling data.
+
struct  +
+
PC Sampling stall reasons.
+
struct  +
+
Params for cuptiPCSamplingStart.
+
struct  +
+
Params for cuptiPCSamplingStop.
+
+

Typedefs

+
+
typedef + void +  ( *CUpti_ComputeCrcCallbackFunc )( const void* +  cubin,  size_t cubinSize, uint64_t* +  cubinCrc )
+
Function type for callback used by CUPTI to request crc of loaded module.
+
+

Enumerations

+
+
enum CUpti_PCSamplingCollectionMode
+
PC Sampling collection mode.
+
enum CUpti_PCSamplingConfigurationAttributeType
+
PC Sampling configuration attributes.
+
enum CUpti_PCSamplingOutputDataFormat
+
PC Sampling output data format.
+
+

Functions

+
+
CUptiResult cuptiGetCubinCrc ( CUpti_GetCubinCrcParams* pParams )
+
Get the CRC of cubin.
+
CUptiResult cuptiGetSassToSourceCorrelation ( CUpti_GetSassToSourceCorrelationParams* pParams )
+
SASS to Source correlation.
+
CUptiResult cuptiPCSamplingDisable ( CUpti_PCSamplingDisableParams* pParams )
+
Disable PC sampling.
+
CUptiResult cuptiPCSamplingEnable ( CUpti_PCSamplingEnableParams* pParams )
+
Enable PC sampling.
+
CUptiResult cuptiPCSamplingGetConfigurationAttribute ( CUpti_PCSamplingConfigurationInfoParams* pParams )
+
Read PC Sampling configuration attribute.
+
CUptiResult cuptiPCSamplingGetData ( CUpti_PCSamplingGetDataParams* pParams )
+
Flush GPU PC sampling data periodically.
+
CUptiResult cuptiPCSamplingGetNumStallReasons ( CUpti_PCSamplingGetNumStallReasonsParams* pParams )
+
Get PC sampling stall reason count.
+
CUptiResult cuptiPCSamplingGetStallReasons ( CUpti_PCSamplingGetStallReasonsParams* pParams )
+
Get PC sampling stall reasons.
+
CUptiResult cuptiPCSamplingSetConfigurationAttribute ( CUpti_PCSamplingConfigurationInfoParams* pParams )
+
Write PC Sampling configuration attribute.
+
CUptiResult cuptiPCSamplingStart ( CUpti_PCSamplingStartParams* pParams )
+
Start PC sampling.
+
CUptiResult cuptiPCSamplingStop ( CUpti_PCSamplingStopParams* pParams )
+
Stop PC sampling.
+
CUptiResult cuptiRegisterComputeCrcCallback ( CUpti_ComputeCrcCallbackFunc funcComputeCubinCrc )
+
Register callback function with CUPTI to use your own algorithm to compute cubin crc.
+
+
+

Typedefs

+
+
+ + void + ( *CUpti_ComputeCrcCallbackFunc )( const void* +  cubin,  size_t cubinSize, uint64_t* +  cubinCrc )
+
+
+

Function type for callback used by CUPTI to request crc of loaded module. This callback function ask for crc of provided + module in function. The provided crc will be stored in PC sampling records i.e. in the field 'cubinCrc' of the PC sampling + struct CUpti_PCSamplingPCData. The CRC is uses during the offline source correlation to uniquely identify the module. +

+

+
+
+
+
+ Parameters + +
+
+
cubin
+
The pointer to cubin binary
+
size_t cubinSize
+
+
cubinCrc
+
Returns the computed crc of cubin.
+
+
+
+
+
+

Enumerations

+
+
+ enum CUpti_PCSamplingCollectionMode
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_PC_SAMPLING_COLLECTION_MODE_INVALID = 0
+
INVALID Value
+
CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS = 1
+
Continuous mode. Kernels are not serialized in this mode.
+
CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED = 2
+
Serialized mode. Kernels are serialized in this mode.
+
+
+
+
+ enum CUpti_PCSamplingConfigurationAttributeType
+
+
+

PC Sampling configuration attribute types. These attributes can be read using cuptiPCSamplingGetConfigurationAttribute and can be written using cuptiPCSamplingSetConfigurationAttribute. Attributes marked [r] can only be read using cuptiPCSamplingGetConfigurationAttribute [w] can only be written using cuptiPCSamplingSetConfigurationAttribute [rw] can be read using cuptiPCSamplingGetConfigurationAttribute and written using cuptiPCSamplingSetConfigurationAttribute

+
+
+
+ Values + +
+
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_INVALID = 0
+
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD = 1
+
[rw] Sampling period for PC Sampling. DEFAULT - CUPTI defined value based on number of SMs Valid values for the sampling periods + are between 5 to 31 both inclusive. This will set the sampling period to (2^samplingPeriod) cycles. For e.g. for sampling + period = 5 to 31, cycles = 32, 64, 128,..., 2^31 Value is a uint32_t +
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON = 2
+
[w] Number of stall reasons to collect. DEFAULT - All stall reasons will be collected Value is a size_t [w] Stall reasons + to collect DEFAULT - All stall reasons will be collected Input value should be a pointer pointing to array of stall reason + indexes containing all the stall reason indexes to collect. +
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SCRATCH_BUFFER_SIZE = 3
+
[rw] Size of SW buffer for raw PC counter data downloaded from HW buffer DEFAULT - 1 MB, which can accommodate approximately + 5500 PCs with all stall reasons Approximately it takes 16 Bytes (and some fixed size memory) to accommodate one PC with one + stall reason For e.g. 1 PC with 1 stall reason = 32 Bytes 1 PC with 2 stall reason = 48 Bytes 1 PC with 4 stall reason = 96 + Bytes Value is a size_t +
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE = 4
+
[rw] Size of HW buffer in bytes DEFAULT - 512 MB If sampling period is too less, HW buffer can overflow and drop PC data Value + is a size_t +
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE = 5
+
[rw] PC Sampling collection mode DEFAULT - CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS Input value should be of type CUpti_PCSamplingCollectionMode. +
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL = 6
+
[rw] Control over PC Sampling data collection range Default - 0 1 - Allows user to start and stop PC Sampling using APIs - + cuptiPCSamplingStart() - Start PC Sampling cuptiPCSamplingStop() - Stop PC Sampling Value is a uint32_t +
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_OUTPUT_DATA_FORMAT = 7
+
[w] Value for output data format Default - CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_PARSED Input value should be of type CUpti_PCSamplingOutputDataFormat. +
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_DATA_BUFFER = 8
+
[w] Data buffer to hold collected PC Sampling data PARSED_DATA Default - none. Buffer type is void * which can point to PARSED_DATA + Refer CUpti_PCSamplingData for buffer format for PARSED_DATA +
+
CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_FORCE_INT = 0x7fffffff
+
+
+
+
+
+ enum CUpti_PCSamplingOutputDataFormat
+
+
+

+
+
+
+ Values + +
+
+
CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_INVALID = 0
+
+
CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_PARSED = 1
+
HW buffer data will be parsed during collection of data
+
+
+
+
+
+
+

Functions

+
+
+ CUptiResult cuptiGetCubinCrc ( CUpti_GetCubinCrcParams* pParams )
+
+
Get the CRC of cubin.
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if parameter cubin is NULL or provided cubinSize is zero or size field is not set.

    +
  • +
+

+
+
+
Description
+

This function returns the CRC of provided cubin binary.

+

+
+
+
+ CUptiResult cuptiGetSassToSourceCorrelation ( CUpti_GetSassToSourceCorrelationParams* pParams )
+
+
SASS to Source correlation.
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if either of the parameters cubin or functionName is NULL or cubinSize is zero or size field is not set correctly.

    +
  • +
  • CUPTI_ERROR_INVALID_MODULE +

    provided cubin is invalid.

    +
  • +
  • CUPTI_ERROR_UNKNOWN +

    an internal error occurred. This error code is also used for cases when the function is not present in the module. A better + error code will be returned in the future release. +

    +
  • +
+

+
+
+
Description
+

+ It is expected from user to free allocated memory for fileName and dirName after use. +

+

+
+
+
+ CUptiResult cuptiPCSamplingDisable ( CUpti_PCSamplingDisableParams* pParams )
+
+
Disable PC sampling.
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if any pParams is not valid +

    +
  • +
  • CUPTI_ERROR_NOT_SUPPORTED +

    indicates that the system/device does not support the API

    +
  • +
+

+
+
+
Description
+

For application which doesn't destroy the CUDA context explicitly, this API does the PC Sampling tear-down, joins threads + and copies PC records in the buffer provided during the PC sampling configuration. PC records which can't be accommodated + in the buffer are discarded. +

+

+
+
+
+ CUptiResult cuptiPCSamplingEnable ( CUpti_PCSamplingEnableParams* pParams )
+
+
Enable PC sampling.
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if any pParams is not valid +

    +
  • +
  • CUPTI_ERROR_NOT_SUPPORTED +

    indicates that the system/device does not support the API

    +
  • +
+

+
+
+
Description
+

+
+
+
+ CUptiResult cuptiPCSamplingGetConfigurationAttribute ( CUpti_PCSamplingConfigurationInfoParams* pParams )
+
+
Read PC Sampling configuration attribute.
+
+
+ Parameters + +
+
+
pParams
+
A pointer to CUpti_PCSamplingConfigurationInfoParams containing PC sampling configuration. +
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if this API is called with some invalid attribute.

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if attrib is not valid or any pParams is not valid +

    +
  • +
  • CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT +

    indicates that the value buffer is too small to hold the attribute value +

    +
  • +
  • CUPTI_ERROR_NOT_SUPPORTED +

    indicates that the system/device does not support the API

    +
  • +
+

+
+
+
Description
+

+
+
+
+ CUptiResult cuptiPCSamplingGetData ( CUpti_PCSamplingGetDataParams* pParams )
+
+
Flush GPU PC sampling data periodically.
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if this API is called without enabling PC sampling.

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if any pParams is not valid +

    +
  • +
  • CUPTI_ERROR_NOT_SUPPORTED +

    indicates that the system/device does not support the API

    +
  • +
+

+
+
+
Description
+

Flushing of GPU PC Sampling data is required at following point to maintain uniqueness of PCs: For CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS, + after every module load-unload-load For CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED, after every kernel ends If configuration + option CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL is enabled, then after every range end i.e. cuptiPCSamplingStop() If application is profiled in CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS, with disabled CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL, + and there is no module unload, user can collect data in two ways: Use cuptiPCSamplingGetData() API periodically Use cuptiPCSamplingDisable() on application exit and read GPU PC sampling data from sampling data buffer passed during configuration. Note: In case, cuptiPCSamplingGetData() API is not called periodically, then sampling data buffer passed during configuration should be large enough to hold all + PCs data. cuptiPCSamplingGetData() API never does device synchronization. It is possible that when the API is called there is some unconsumed data from the + HW buffer. In this case CUPTI provides only the data available with it at that moment. + + +

+
+
+
+ CUptiResult cuptiPCSamplingGetNumStallReasons ( CUpti_PCSamplingGetNumStallReasonsParams* pParams )
+
+
Get PC sampling stall reason count.
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if any pParams is not valid +

    +
  • +
  • CUPTI_ERROR_NOT_SUPPORTED +

    indicates that the system/device does not support the API

    +
  • +
+

+
+
+
Description
+

+
+
+
+ CUptiResult cuptiPCSamplingGetStallReasons ( CUpti_PCSamplingGetStallReasonsParams* pParams )
+
+
Get PC sampling stall reasons.
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if any pParams is not valid +

    +
  • +
  • CUPTI_ERROR_NOT_SUPPORTED +

    indicates that the system/device does not support the API

    +
  • +
+

+
+
+
Description
+

+
+
+
+ CUptiResult cuptiPCSamplingSetConfigurationAttribute ( CUpti_PCSamplingConfigurationInfoParams* pParams )
+
+
Write PC Sampling configuration attribute.
+
+
+ Parameters + +
+
+
pParams
+
A pointer to CUpti_PCSamplingConfigurationInfoParams containing PC sampling configuration. +
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if this API is called with some invalid attrib. +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if attribute value is not valid or any pParams is not valid +

    +
  • +
  • CUPTI_ERROR_NOT_SUPPORTED +

    indicates that the system/device does not support the API

    +
  • +
+

+
+
+
Description
+

+
+
+
+ CUptiResult cuptiPCSamplingStart ( CUpti_PCSamplingStartParams* pParams )
+
+
Start PC sampling.
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if this API is called with incorrect PC Sampling configuration.

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if any pParams is not valid +

    +
  • +
  • CUPTI_ERROR_NOT_SUPPORTED +

    indicates that the system/device does not support the API

    +
  • +
+

+
+
+
Description
+

User can collect PC Sampling data for user-defined range specified by Start/Stop APIs. This API can be used to mark starting + of range. Set configuration option CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL to use this API. + + +

+
+
+
+ CUptiResult cuptiPCSamplingStop ( CUpti_PCSamplingStopParams* pParams )
+
+
Stop PC sampling.
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_OPERATION +

    if this API is called with incorrect PC Sampling configuration.

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if any pParams is not valid +

    +
  • +
  • CUPTI_ERROR_NOT_SUPPORTED +

    indicates that the system/device does not support the API

    +
  • +
+

+
+
+
Description
+

User can collect PC Sampling data for user-defined range specified by Start/Stop APIs. This API can be used to mark end of + range. Set configuration option CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL to use this API. + + +

+
+
+
+ CUptiResult cuptiRegisterComputeCrcCallback ( CUpti_ComputeCrcCallbackFunc funcComputeCubinCrc )
+
+
Register callback function with CUPTI to use your own algorithm to compute cubin crc.
+
+
+ Parameters + +
+
+
funcComputeCubinCrc
+
callback is invoked when a CUDA module is loaded.
+
+
+
+
Returns
+

+

    +
  • CUPTI_SUCCESS +

    +
  • +
  • CUPTI_ERROR_INVALID_PARAMETER +

    if funcComputeCubinCrc is NULL. +

    +
  • +
+

+
+
+
Description
+

This function registers a callback function and it gets called from CUPTI when a CUDA module is loaded.

+

+
+
+
+
+
+
+

5.10. CUPTI PC Sampling Utility API

+
+

Functions, types, and enums that implement the CUPTI PC Sampling Utility API.

+
+

Classes

+
+
struct  +
+
BufferInfo will be stored in the file for every buffer i.e for every call of UtilDumpPcSamplingBufferInFile() API.
+
struct  +
+
Params for CuptiUtilGetBufferInfo.
+
struct  +
+
Params for CuptiUtilGetHeaderData.
+
struct  +
+
Params for CuptiUtilGetPcSampData.
+
struct  +
+
Params for CuptiUtilMergePcSampData.
+
struct  +
+
Params for CuptiUtilPutPcSampData.
+
struct  +
+
Header info will be stored in file.
+
struct  +
+
All available stall reasons name and respective indexes will be stored in it.
+
+

Enumerations

+
+
enum CUPTI::PcSamplingUtil::CUptiUtilResult
+
CUPTI PC sampling utility API result codes.
+
+

Functions

+
+
CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilGetBufferInfo ( CUptiUtil_GetBufferInfoParams* pParams )
+
Get buffer info data of file.
+
CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilGetHeaderData ( CUptiUtil_GetHeaderDataParams* pParams )
+
Get header data of file.
+
CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilGetPcSampData ( CUptiUtil_GetPcSampDataParams* pParams )
+
Retrieve PC sampling data from file into allocated buffer.
+
CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilMergePcSampData ( CUptiUtil_MergePcSampDataParams* pParams )
+
Merge PC sampling data range id wise.
+
CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilPutPcSampData ( CUptiUtil_PutPcSampDataParams* pParams )
+
Dump PC sampling data into the file.
+
+
+

Enumerations

+
+
+ enum CUPTI::PcSamplingUtil::CUptiUtilResult
+
+
+

Error and result codes returned by CUPTI PC sampling utility API.

+
+
+
+ Values + +
+
+
CUPTI_UTIL_SUCCESS = 0
+
No error
+
CUPTI_UTIL_ERROR_INVALID_PARAMETER = 1
+
One or more of the parameters are invalid.
+
CUPTI_UTIL_ERROR_UNABLE_TO_CREATE_FILE = 2
+
Unable to create a new file
+
CUPTI_UTIL_ERROR_UNABLE_TO_OPEN_FILE = 3
+
Unable to open a file
+
CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED = 4
+
Read or write operation failed
+
CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED = 5
+
Provided file handle is corrupted.
+
CUPTI_UTIL_ERROR_SEEK_OPERATION_FAILED = 6
+
seek operation failed.
+
CUPTI_UTIL_ERROR_OUT_OF_MEMORY = 7
+
Unable to allocate enough memory to perform the requested operation.
+
CUPTI_UTIL_ERROR_UNKNOWN = 999
+
An unknown internal error has occurred.
+
CUPTI_UTIL_ERROR_FORCE_INT = 0x7fffffff
+
+
+
+
+
+
+
+

Functions

+
+
+ CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilGetBufferInfo ( CUptiUtil_GetBufferInfoParams* pParams )
+
+
Get buffer info data of file.
+
+
Returns
+

+

    +
  • CUPTI_UTIL_SUCCESS +

    +
  • +
  • CUPTI_UTIL_ERROR_INVALID_PARAMETER +

    error out if either of pParam or fileHandle is NULL or param struct size is incorrect.

    +
  • +
  • CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED +

    file handle is not in good state to read data from file.

    +
  • +
  • CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED +

    failed to read data from file.

    +
  • +
+

+
+
+
Description
+

This API must be called every time before calling CuptiUtilGetPcSampData API. BufferInfo structure, it gives info about recordCount and stallReasonCount of every record in the buffer. This will help to allocate + exact buffer to retrieve data into it. +

+

+
+
+
+ CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilGetHeaderData ( CUptiUtil_GetHeaderDataParams* pParams )
+
+
Get header data of file.
+
+
Returns
+

+

    +
  • CUPTI_UTIL_SUCCESS +

    +
  • +
  • CUPTI_UTIL_ERROR_INVALID_PARAMETER +

    error out if either of pParam or fileHandle is NULL or param struct size is incorrect.

    +
  • +
  • CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED +

    file handle is not in good state to read data from file

    +
  • +
  • CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED +

    failed to read data from file.

    +
  • +
+

+
+
+
Description
+

This API must be called once initially while retrieving data from file. Header structure, it gives info about total number of buffers present in the file. +

+

+
+
+
+ CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilGetPcSampData ( CUptiUtil_GetPcSampDataParams* pParams )
+
+
Retrieve PC sampling data from file into allocated buffer.
+
+
Returns
+

+

    +
  • CUPTI_UTIL_SUCCESS +

    +
  • +
  • CUPTI_UTIL_ERROR_INVALID_PARAMETER +

    error out if buffer type is invalid or if either of pSampData, pParams is NULL. If pPcSamplingStallReasons is not NULL then + error out if either of stallReasonIndex, stallReasons or stallReasons array element pointer is NULL. or filename is empty. + +

    +
  • +
  • CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED +

    +
  • +
  • CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED +

    file handle is not in good state to read data from file.

    +
  • +
+

+
+
+
Description
+

This API must be called after CuptiUtilGetBufferInfo API. It will retrieve data from file into allocated buffer.

+

+
+
+
+ CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilMergePcSampData ( CUptiUtil_MergePcSampDataParams* pParams )
+
+
Merge PC sampling data range id wise.
+
+
Returns
+

+

    +
  • CUPTI_UTIL_SUCCESS +

    +
  • +
  • CUPTI_UTIL_ERROR_INVALID_PARAMETER +

    error out if param struct size is invalid or count of buffers to merge is invalid i.e less than 1 or either of PcSampDataBuffer, + MergedPcSampDataBuffers, numMergedBuffer is NULL +

    +
  • +
  • CUPTI_UTIL_ERROR_OUT_OF_MEMORY +

    Unable to allocate memory for merged buffer.

    +
  • +
+

+
+
+
Description
+

This API merge PC sampling data range id wise. It allocates memory for merged data and fill data in it and provide buffer + pointer in MergedPcSampDataBuffers field. It is expected from user to free merge data buffers after use. +

+

+
+
+
+ CUptiUtilResult CUPTIUTILAPI CUPTI::PcSamplingUtil::CuptiUtilPutPcSampData ( CUptiUtil_PutPcSampDataParams* pParams )
+
+
Dump PC sampling data into the file.
+
+
Returns
+

+

    +
  • CUPTI_UTIL_SUCCESS +

    +
  • +
  • CUPTI_UTIL_ERROR_INVALID_PARAMETER +

    error out if buffer type is invalid or if either of pSamplingData, pParams pointer is NULL or stall reason configuration details + not provided or filename is empty. +

    +
  • +
  • CUPTI_UTIL_ERROR_UNABLE_TO_CREATE_FILE +

    +
  • +
  • CUPTI_UTIL_ERROR_UNABLE_TO_OPEN_FILE +

    +
  • +
  • CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED +

    +
  • +
+

+
+
+
Description
+

This API can be called multiple times. It will append buffer in the file. For every buffer it will store BufferInfo so that before retrieving data it will help to allocate buffer to store retrieved data. This API creates file if file does + not present. If stallReasonIndex or stallReasons pointer of CUptiUtil_PutPcSampDataParams is NULL then stall reasons data will not be stored in file. It is expected to store all available stall reason data at least + once to refer it during offline correlation. +

+

+
+
+
+
+
+
+ +
+ +
+
+
+ + + + + + \ No newline at end of file diff --git a/doc/Cupti/notices-header.html b/doc/Cupti/notices-header.html new file mode 100644 index 0000000000000000000000000000000000000000..1b47d7f87457ec936ce332163f7a6549522e9cf5 --- /dev/null +++ b/doc/Cupti/notices-header.html @@ -0,0 +1,934 @@ + + + + + + + + + + + + + + CUPTI :: CUPTI Documentation + + + --> + + + + + + + + + + + + + + + + + + +
+ +
+ + +
+ +
+
+

Notices

+
+

+
+
+

Notice

+

ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND + SEPARATELY, "MATERIALS") ARE BEING PROVIDED "AS IS." NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE + WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS + FOR A PARTICULAR PURPOSE. +

+

Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the + consequences of use of such information or for any infringement of patents or other rights of third parties that may result + from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications + mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information + previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems + without express written approval of NVIDIA Corporation. +

+
+
+
+
+

+
+
+

Trademarks

+

NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation + in the U.S. and other countries. Other company and product names may be trademarks of + the respective companies with which they are associated. +

+
+
+
+ +
+ +
+ +
+
+
+ + + + + + \ No newline at end of file diff --git a/doc/Cupti/r_library_support.html b/doc/Cupti/r_library_support.html new file mode 100644 index 0000000000000000000000000000000000000000..57025877be4bc3b4d7d09283c9a8c05f1e355b29 --- /dev/null +++ b/doc/Cupti/r_library_support.html @@ -0,0 +1,944 @@ + + + + + + + + + + + + + + CUPTI :: CUPTI Documentation + + + --> + + + + + + + + + + + + + + + + + + +
+ +
+ + +
+ +
+
+

3. Library Support

+
+
+

+ CUPTI can be used to profile CUDA applications, as well as + applications that use CUDA via NVIDIA or third-party libraries. For most such libraries, + the behavior is expected to be identical to applications using CUDA directly. However, + for certain libraries, CUPTI has certain restrictions, or alternate behavior. +

+
+
+
+

3.1. OptiX

+
+
+

+ CUPTI supports profiling of OptiX applications, but with certain restrictions. + +

+

Tracing

+
    +
  • Internal Kernels

    Kernels launched by OptiX that contain no user-defined code are given the generic + name NVIDIA internal. CUPTI provides the tracing information for these kernels. +

    +
  • +
  • User Kernels

    Kernels launched by OptiX can contain user-defined code. OptiX identifies these + kernels with a custom name. This name starts with + raygen__ (for "ray generation"). These kernels can be traced. +

    +
  • +
+

Profiling

+

CUPTI can profile both internal and user kernels using the Profiling APIs. + In the auto range mode, range names will be numeric values starting from 0 + to total number of kernels including internal and user defined kernels or + maximum number of range set while calling set config API, whichever is minimum. + +

+

It is suggested to create the profiling session and enable the profiling at + resource allocation time (e.g. context creation) and disable the profiling at + the context destruction time. + +

+

Limitations

+
    +
  • +

    CUPTI doesn't issue any driver or runtime API callback for user kernels.

    +
  • +
  • +

    Event, Metric and PC sampling APIs are not supported for OptiX applications.

    +
  • +
+
+
+
+
+ +
+ +
+
+
+ + + + + + \ No newline at end of file diff --git a/doc/Cupti/r_main.html b/doc/Cupti/r_main.html new file mode 100644 index 0000000000000000000000000000000000000000..7a02391471349971774eb113887af81fa9604ae2 --- /dev/null +++ b/doc/Cupti/r_main.html @@ -0,0 +1,9393 @@ + + + + + + + + + + + + + + CUPTI :: CUPTI Documentation + + + --> + + + + + + + + + + + + + + + + + + +
+ +
+ + +
+ +
+
+

2. Usage

+
+

2.1. CUPTI Compatibility and Requirements

+
+
+

New versions of the CUDA driver are backwards compatible with older + versions of CUPTI. For example, a developer using a profiling tool + based on CUPTI 10.0 can update to a more recently released CUDA driver. + Refer to the table + CUDA Toolkit and Compatible Driver Versions + for minimum version of the CUDA driver required for each release of + CUPTI from the corresponding CUDA Toolkit release. + CUPTI calls will fail with error code CUPTI_ERROR_NOT_INITIALIZED + if the CUDA driver version is not compatible with the CUPTI version. + +

+
+
+
+
+

2.2. CUPTI Initialization

+
+
+

CUPTI initialization occurs lazily the first time you + invoke any CUPTI function. For the Activity, Event, Metric, + and Callback APIs there are no requirements on when this + initialization must occur (i.e. you can invoke the first + CUPTI function at any point). See the CUPTI Activity API + section for more information on CUPTI initialization + requirements for the activity API. +

+

It is recommended for CUPTI clients to call the API cuptiSubscribe() + before starting the profiling session i.e. API cuptiSubscribe() should + be called before calling any other CUPTI API. This API will return the error code + CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED when another CUPTI client + is already subscribed. CUPTI client should error out and not make further CUPTI calls + if cuptiSubscribe() returns an error. This would prevent multiple CUPTI + clients to be active at the same time otherwise those might interfere with the + profiling state of each other. +

+
+
+
+
+

2.3. CUPTI Activity API

+
+
+

The CUPTI Activity API allows you to asynchronously collect a + trace of an application's CPU and GPU CUDA activity. The + following terminology is used by the activity API. +

+
+
Activity Record
+
CPU and GPU activity is reported in C data structures + called activity records. There is a different C structure + type for each activity kind (e.g. + CUpti_ActivityAPI). Records are generically + referred to using the CUpti_Activity type. This + type contains only a field that indicates the kind of + the activity record. Using this kind, the object can be cast + from the generic CUpti_Activity type to the + specific type representing the activity. See the + printActivity function in the activity_trace_async + sample for an example. +
+
Activity Buffer
+
An activity buffer is used to transfer one or more + activity records from CUPTI to the client. CUPTI fills + activity buffers with activity records as the corresponding + activities occur on the CPU and GPU. But CUPTI doesn't + guarantee any ordering of the activities in the activity buffer + as activity records for few activity kinds are added lazily. + The CUPTI client is responsible for providing empty activity + buffers as necessary to ensure that no records are dropped. +
+
+

An asynchronous buffering API is implemented by + cuptiActivityRegisterCallbacks and + cuptiActivityFlushAll. +

+

It is not required that the activity API be initalized before + CUDA initialization. All related activities occuring after initializing + the activity API are collected. You can force + initialization of the activity API by enabling one or more + activity kinds using cuptiActivityEnable or + cuptiActivityEnableContext, as shown in the + initTrace function of the activity_trace_async + sample. Some activity kinds cannot be directly enabled, see the + API documentation for CUpti_ActivityKind for + details. The functions cuptiActivityEnable and + cuptiActivityEnableContext will return + CUPTI_ERROR_NOT_COMPATIBLE if the requested activity + kind cannot be enabled. +

+
+
+ The activity buffer API uses callbacks to request and return + buffers of activity records. To use the asynchronous buffering + API, you must first register two callbacks using + cuptiActivityRegisterCallbacks. One of these callbacks + will be invoked whenever CUPTI needs an empty activity + buffer. The other callback is used to deliver a buffer + containing one or more activity records to the client. To + minimize profiling overhead the client should return as quickly + as possible from these callbacks. Client can pre-allocate a pool + of activity buffers and return an empty buffer from the pool when + requested by CUPTI. Activity buffer size should be chosen carefully, + smaller buffers can result in frequent requests by CUPTI and bigger + buffers can delay the automatic delivery of completed activity buffers. + For typical workloads, it's suggested to choose a size between 1 and 10 MB. + The functions cuptiActivityGetAttribute and + cuptiActivitySetAttribute can be used to read and write + attributes that control how the buffering API behaves. See the + API documentation for more information. + +

Flushing of the activity buffers

+ + CUPTI is expected to deliver the activity buffer automatically as soon as it gets full and all + the activity records in it are completed. For performance reasons, CUPTI calls the underlying + methods based on certain heuristics, thus it can cause delay in the delivery of the buffer. + However client can make a request to deliver the activity buffer/s at any time, and this can be + achieved using the APIs cuptiActivityFlushAll and cuptiActivityFlushPeriod. + Behavior of these APIs is as follows: + +
    +
  • For on-demand flush using the API cuptiActivityFlushAll with the flag set as 0, + CUPTI returns all the activity buffers which have all the activity records completed, buffers + need not to be full though. It doesn't return buffers which have one or more incomplete records. + This flush can be done at a regular interval in a separate thread. + +
  • +
  • For on-demand forced flush using the API cuptiActivityFlushAll with the flag set + as CUPTI_ACTIVITY_FLAG_FLUSH_FORCED, CUPTI returns all the activity buffers including the + ones which have one or more incomplete activity records. It's suggested to do the forced flush + before the termination of the profiling session to allow remaining + buffers to be delivered. + +
  • +
  • For periodic flush using the API cuptiActivityFlushPeriod, CUPTI returns only those + activity buffers which are full and have all the activity records completed. + It's allowed to use the API cuptiActivityFlushAll to flush the buffers on-demand, even + when client sets the periodic flush. +
  • +
+ + Note that activity record is considered as completed if it has all the information filled up + including the timestamps (if any). + +

+ The activity_trace_async + sample shows how to use the activity buffer API to collect a + trace of CPU and GPU activity for a simple application. + +

+
+
+

CUPTI Threads

+ + CUPTI creates a worker thread to minimize the perturbance for the application created threads. + CUPTI offloads certain operations from the application threads to the worker thread, + this incldues synchronization of profiling resources between host and device, delivery of the + activity buffers to the client using the buffer completed callback registered in the API + cuptiActivityRegisterCallbacks etc. + To minimize the overhead, CUPTI wakes up the worker thread based on certain heuristics. + API cuptiActivityFlushPeriod introduced in CUDA 11.1 can be used to control the flush + period of the worker thread. This setting overrides the CUPTI heurtistics. It's allowed to use + the API cuptiActivityFlushAll to flush the data on-demand, even when client sets the + periodic flush. + +

Further, CUPTI creates separate threads when certain activity kinds are enabled. + For example, CUPTI creates one thread each for activity kinds CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER + and CUPTI_ACTIVITY_KIND_ENVIRONMENT to collect the information from the backend. +

+
+
+
+

2.3.1. SASS Source Correlation

+
+
+ While high-level languages for GPU programming like CUDA C offer a useful level + of abstraction, convenience, and maintainability, they inherently hide some of the + details of the execution on the hardware. It is sometimes helpful to analyze + performance problems for a kernel at the assembly instruction level. Reading + assembly language is tedious and challenging; CUPTI can help you to build the + correlation between lines in your high-level source code and the executed assembly + instructions. + +
+
+ Building SASS source correlation for a PC can be split into two parts: + +
    +
  • Correlation of the PC to SASS instruction - subscribe to any one of the + CUPTI_CBID_RESOURCE_MODULE_LOADED, CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING, + or CUPTI_CBID_RESOURCE_MODULE_PROFILED callbacks. This returns a + CUpti_ModuleResourceData structure having the CUDA binary. + The binary can be disassembled using the nvdisasm utility that comes with the CUDA toolkit. + An application can have multiple functions and modules, to uniquely identify there + is a functionId field in all source level activity records. This uniquely + corresponds to a CUPTI_ACTIVITY_KIND_FUNCTION, which has the unique module ID and + function ID in the module. +
  • +
  • Correlation of the SASS instruction to CUDA source line - every source level activity + has a sourceLocatorId field which uniquely maps to a record of kind + CUPTI_ACTIVITY_KIND_SOURCE_LOCATOR, containing the line and file name information. + Please note that multiple PCs can correspond to a single source line. +
  • +
+

When any source level activity (global access, branch, PC Sampling, etc.) is enabled, + a source locator record is generated for the PCs that have the source level results. + The record CUpti_ActivityInstructionCorrelation can be used, along with source + level activities, to generate SASS assembly instructions to CUDA C source code mapping + for all the PCs of the function, and not just the PCs that have the source level results. + This can be enabled using the activity kind CUPTI_ACTIVITY_KIND_INSTRUCTION_CORRELATION. + +

+

The sass_source_map + sample shows how to map SASS assembly instructions to CUDA C source. + +

+
+
+
+
+

2.3.2. PC Sampling

+
+
+

+ CUPTI supports device-wide sampling of the program counter (PC). + The PC Sampling gives the number of samples for each source and assembly line with + various stall reasons. Using this information, you can pinpoint portions of your kernel + that are introducing latencies and the reason for the latency. Samples are taken in + round robin order for all active warps at a fixed number of cycles, regardless of whether + the warp is issuing an instruction or not. + +

+

+ Devices with compute capability 6.0 and higher have a new feature that gives + latency reasons. The latency samples indicate the reasons for holes in the + issue pipeline. While collecting these samples, there is no instruction issued + in the respective warp scheduler, hence these give the latency reasons. + The latency reasons will be one of the stall reasons listed in the enum + CUpti_ActivityPCSamplingStallReason, except stall reason + CUPTI_ACTIVITY_PC_SAMPLING_STALL_NOT_SELECTED. + +

+

+ The activity record CUpti_ActivityPCSampling3, enabled using activity kind + CUPTI_ACTIVITY_KIND_PC_SAMPLING, outputs the stall reason along with PC and other related + information. The enum CUpti_ActivityPCSamplingStallReason lists all the stall reasons. + Sampling period is configurable and can be tuned using API cuptiActivityConfigurePCSampling. + A wide range of sampling periods, ranging from 2^5 cycles to 2^31 cycles per sample, is supported. + This can be controlled through the field samplingPeriod2 in the PC sampling configuration struct + CUpti_ActivityPCSamplingConfig. + The activity record CUpti_ActivityPCSamplingRecordInfo provides the total and dropped + samples for each kernel profiled for PC sampling. + +

+

+ This feature is available on devices with compute capability 5.2 and higher, excluding + mobile devices. + For Pascal and older chips cuptiActivityConfigurePCSampling api must be called before enabling + activity kind CUPTI_ACTIVITY_KIND_PC_SAMPLING, for Volta and newer chips order does not matter. + For Volta and newer GPU architectures if cuptiActivityConfigurePCSampling API is called in the + middle of execution, PC sampling configuration will be updated for subsequent kernel launches. + PC sampling can significantly change the overall performance characteristics of the application because + all kernel executions are serialized on the GPU. + +

+

+ The pc_sampling + sample shows how to use these APIs to collect PC Sampling profiling + information for a kernel. + +

+
Note: + A new set of PC Sampling APIs was introduced in the CUDA 11.3 release, which supports continuous mode data + collection without serializing kernel execution and have a lower runtime overhead. Refer to the section + CUPTI PC Sampling API for more details. + PC Sampling APIs from the header cupti_activity.h would be referred as PC Sampling Activity APIs + and APIs from the header cupti_pcsampling.h would be referred as PC Sampling APIs. + +
+
+
+
+ +
+

2.3.4. OpenACC

+
+
+ CUPTI supports collecting information for OpenACC applications + using the OpenACC tools interface implementation of the PGI runtime. + OpenACC profiling is available only on Linux x86_64, IBM POWER and Arm server platform + (arm64 SBSA) platforms. This feature also requires PGI runtime version 19.1 or higher. + +
+
+ The activity records CUpti_ActivityOpenAccData, CUpti_ActivityOpenAccLaunch, + and CUpti_ActivityOpenAccOther are created, representing the three groups of + callback events specified in the OpenACC tools interface. + CUPTI_ACTIVITY_KIND_OPENACC_DATA, CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH, + and CUPTI_ACTIVITY_KIND_OPENACC_OTHER can be enabled to collect the respective + activity records. + +
+
+ Due to the restrictions of the OpenACC tools interface, CUPTI cannot record OpenACC + records from within the client application. Instead, a shared library that + exports the acc_register_library function defined in the OpenACC + tools interface specification must be implemented. Parameters passed into this + function from the OpenACC runtime can be used to initialize the CUPTI OpenACC + measurement using cuptiOpenACCInitialize. Before starting the client + application, the environment variable ACC_PROFLIB must be set to + point to this shared library. + +
+
cuptiOpenACCInitialize is defined in cupti_openacc.h, which is included + by cupti_activity.h. Since the CUPTI OpenACC header is only available on supported + platforms, CUPTI clients must define CUPTI_OPENACC_SUPPORT when compiling. + +
+
+ The openacc_trace + sample shows how to use CUPTI APIs for OpenACC data collection. + +
+
+
+
+

2.3.5. CUDA Graphs

+
+
+

CUPTI can collect trace of CUDA Graphs applications without + breaking driver performance optimizations. CUPTI has added fields graphId and graphNodeId + in the kernel, memcpy and memset activity records to denote the unique ID of the graph and the graph node respectively of + the GPU activity. + CUPTI issues callbacks for graph operations like graph and graph node creation/destruction/cloning and also for executable + graph creation/destruction. + The cuda_graphs_trace sample shows how to collect GPU trace and API trace for CUDA Graphs and how to correlate a graph node launch to the node + creation API by using CUPTI callbacks for graph operations. + +

+
+
+
+
+

2.3.6. External Correlation

+
+
+ CUPTI supports correlation of CUDA API activity records + with external APIs. Such APIs include OpenACC, OpenMP, and MPI. + This associates CUPTI correlation IDs with IDs provided by the + external API. Both IDs are stored in a new activity record of type + CUpti_ActivityExternalCorrelation. + +
+
+ CUPTI maintains a stack of external correlation IDs per CPU thread and per + CUpti_ExternalCorrelationKind. Clients must use + cuptiActivityPushExternalCorrelationId to push an external ID of + a specific kind to this stack and cuptiActivityPopExternalCorrelationId + to remove the latest ID. If a CUDA API activity record is generated while + any CUpti_ExternalCorrelationKind-stack on the same CPU thread is + non-empty, one CUpti_ActivityExternalCorrelation record per + CUpti_ExternalCorrelationKind-stack is inserted into the activity + buffer before the respective CUDA API activity record. + The CUPTI client is responsible for tracking passed external API + correlation IDs, in order to eventually associate external API calls with + CUDA API calls. Along with the activity kind CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION, + it is necessary to enable the CUDA API activity kinds i.e. + CUPTI_ACTIVITY_KIND_RUNTIME and CUPTI_ACTIVITY_KIND_DRIVER + to generate external correlation activity records. + +
+
+ If both CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION and any of + CUPTI_ACTIVITY_KIND_OPENACC_* activity kinds are enabled, + CUPTI will generate external correlation activity records for OpenACC + with externalKind CUPTI_EXTERNAL_CORRELATION_KIND_OPENACC. + +
+
+ The cupti_external_correlation + sample shows how to use CUPTI APIs for external correlation. + +
+
+
+
+

2.3.7. Dynamic Attach and Detach

+
+
+ CUPTI provides mechanisms for attaching to or detaching from a running process to support + on-demand profiling. + CUPTI can be attached by calling any CUPTI API as CUPTI supports lazy initialization. + To detach CUPTI, call the API cuptiFinalize() which destroys and cleans up all the + resources associated with CUPTI in the current process. After CUPTI detaches from the process, + the process will keep on running with no CUPTI attached to it. Any subsequent CUPTI API call + will reinitialize the CUPTI. You can attach and detach CUPTI any number of times. + For safe operation of the API, it is recommended that API cuptiFinalize() is invoked + from the exit call site of any of the CUDA Driver or Runtime API. + Otherwise, CUPTI client needs to make sure that CUDA synchronization and CUPTI activity + buffer flush is done before calling the API cuptiFinalize(). + To understand the need for calling the API cuptiFinalize() from specific point/s in the code flow, + consider multiple application threads performing various CUDA activities. + While one thread is in the middle of the cuptiFinalize(), it is quite possible that other + threads continue to call into the CUPTI and try to access the state of various objects + (device, context, thread state etc) maintained by CUPTI, + which might be rendered invalid as part of the cuptiFinalize(), thus resulting in the crash. + We have to block the other threads until CUPTI teardown is completed via cuptiFinalize(). + API exit call site is one such location where we can ensure that the work submitted by all the threads + has been completed and we can safely teardown CUPTI. + cuptiFinalize() is a heavy operation as it does context synchronization for all active CUDA contexts + and blocks all the application threads until CUPTI teardown is completed. + Sample code showing the usage of the API cuptiFinalize() in the cupti callback handler code: +
+void CUPTIAPI
+cuptiCallbackHandler(void *userdata, CUpti_CallbackDomain domain,
+    CUpti_CallbackId cbid, void *cbdata)
+{
+    const CUpti_CallbackData *cbInfo = (CUpti_CallbackData *)cbdata;
+
+    // Take this code path when CUPTI detach is requested
+    if (detachCupti) {
+        switch(domain)
+        {
+        case CUPTI_CB_DOMAIN_RUNTIME_API:
+        case CUPTI_CB_DOMAIN_DRIVER_API:
+            if (cbInfo->callbackSite == CUPTI_API_EXIT) {
+                // call the CUPTI detach API
+                cuptiFinalize();
+            }
+            break;
+        default:
+            break;
+        }
+    }
+}
+
+ Full code can be found in the sample + cupti_finalize. + +
+
+
+
+
+

2.4. CUPTI Callback API

+
+
+

The CUPTI Callback API allows you to register a callback into your own + code. Your callback will be invoked when the application being + profiled calls a CUDA runtime or driver function, or when certain + events occur in the CUDA driver. The following terminology is used by + the callback API. +

+
+
Callback Domain
+
Callbacks are grouped into domains to make it + easier to associate your callback functions with groups of related + CUDA functions or events. There are currently four callback domains, + as defined by CUpti_CallbackDomain: a domain for CUDA + runtime functions, a domain for CUDA driver functions, a domain for + CUDA resource tracking, and a domain for CUDA synchronization + notification. +
+
Callback ID
+
Each callback is given a unique ID within the + corresponding callback domain so that you can identify it within + your callback function. The CUDA driver API IDs are defined in + cupti_driver_cbid.h and the CUDA runtime API IDs are + defined in cupti_runtime_cbid.h. Both of these headers + are included for you when you include cupti.h. The CUDA + resource callback IDs are defined by + CUpti_CallbackIdResource, and the CUDA synchronization + callback IDs are defined by CUpti_CallbackIdSync. +
+
Callback Function
+
Your callback function must be of type + CUpti_CallbackFunc. This function type has two arguments + that specify the callback domain and ID so that you know why the + callback is occurring. The type also has a cbdata argument + that is used to pass data specific to the callback. +
+
Subscriber
+
A subscriber is used to associate each of your + callback functions with one or more CUDA API functions. There can be + at most one subscriber initialized with cuptiSubscribe() at any + time. Before initializing a new subscriber, the existing subscriber + must be finalized with cuptiUnsubscribe(). +
+
+

+ Each callback domain is described in detail below. Unless explicitly + stated, it is not supported to call any CUDA runtime or driver API + from within a callback function. Doing so may cause the application to + hang. + +

+
+
+
+

2.4.1. Driver and Runtime API Callbacks

+
+
+

Using the callback API with the + CUPTI_CB_DOMAIN_DRIVER_API or + CUPTI_CB_DOMAIN_RUNTIME_API domains, you can associate a + callback function with one or more CUDA API functions. When those CUDA + functions are invoked in the application, your callback function is + invoked as well. For these domains, the cbdata argument to + your callback function will be of the type + CUpti_CallbackData. +

+

It is legal to call cudaThreadSynchronize(), + cudaDeviceSynchronize(), cudaStreamSynchronize(), + cuCtxSynchronize(), and cuStreamSynchronize() from + within a driver or runtime API callback function. + +

+

The following code shows a typical sequence used to associate a + callback function with one or more CUDA API functions. To simplify the + presentation, error checking code has been removed. +

  CUpti_SubscriberHandle subscriber;
+  MyDataStruct *my_data = ...;
+  ...
+  cuptiSubscribe(&subscriber, 
+                 (CUpti_CallbackFunc)my_callback , my_data);
+  cuptiEnableDomain(1, subscriber, 
+                    CUPTI_CB_DOMAIN_RUNTIME_API);

+ First, cuptiSubscribe is used to initialize a subscriber with + the my_callback callback function. Next, + cuptiEnableDomain is used to associate that callback with all + the CUDA runtime API functions. Using this code sequence will cause + my_callback to be called twice each time any of the CUDA + runtime API functions are invoked, once on entry to the CUDA function + and once just before exit from the CUDA function. CUPTI callback API + functions cuptiEnableCallback and + cuptiEnableAllDomains can also be used to associate CUDA API + functions with a callback (see reference below for more information). + +

+

The following code shows a typical callback function.

void CUPTIAPI
+my_callback(void *userdata, CUpti_CallbackDomain domain,
+            CUpti_CallbackId cbid, const void *cbdata)
+{
+  const CUpti_CallbackData *cbInfo = (CUpti_CallbackData *)cbdata;
+  MyDataStruct *my_data = (MyDataStruct *)userdata;
+      
+  if ((domain == CUPTI_CB_DOMAIN_RUNTIME_API) &&
+      (cbid == CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020))  { 
+    if (cbInfo->callbackSite == CUPTI_API_ENTER) {
+        cudaMemcpy_v3020_params *funcParams = 
+             (cudaMemcpy_v3020_params *)(cbInfo->
+                 functionParams);
+
+        size_t count = funcParams->count;
+        enum cudaMemcpyKind kind = funcParams->kind;
+        ...
+      }
+  ...

+ In your callback function, you use the CUpti_CallbackDomain + and CUpti_CallbackID parameters to determine which CUDA API + function invocation is causing this callback. In the example above, we + are checking for the CUDA runtime cudaMemcpy function. The + cbdata parameter holds a structure of useful + information that can be used within the callback. In this case, we use + the callbackSite member of the structure to detect that the + callback is occurring on entry to cudaMemcpy, and we use the + functionParams member to access the parameters that were + passed to cudaMemcpy. To access the parameters, we first cast + functionParams to a structure type corresponding to the + cudaMemcpy function. These parameter structures are contained + in generated_cuda_runtime_api_meta.h, + generated_cuda_meta.h, and a number of other files. When + possible, these files are included for you by cupti.h. + +

+

+ The callback_event and callback_timestamp samples + described on the samples page both show how to use the callback + API for the driver and runtime API domains. + +

+
+
+
+
+

2.4.2. Resource Callbacks

+
+
+

Using the callback API with the CUPTI_CB_DOMAIN_RESOURCE + domain, you can associate a callback function with some CUDA resource + creation and destruction events. For example, when a CUDA context is + created, your callback function will be invoked with a callback ID + equal to CUPTI_CBID_RESOURCE_CONTEXT_CREATED. For this + domain, the cbdata argument to your callback function will be + of the type CUpti_ResourceData. +

+

Note that APIs cuptiActivityFlush and cuptiActivityFlushAll + will result in deadlock when called from stream destroy starting callback + identified using callback ID CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING. +

+
+
+
+
+

2.4.3. Synchronization Callbacks

+
+
+

Using the callback API with the + CUPTI_CB_DOMAIN_SYNCHRONIZE domain, you can associate a + callback function with CUDA context and stream synchronizations. For + example, when a CUDA context is synchronized, your callback function + will be invoked with a callback ID equal to + CUPTI_CBID_SYNCHRONIZE_CONTEXT_SYNCHRONIZED. For this + domain, the cbdata argument to your callback function will be + of the type CUpti_SynchronizeData. +

+
+
+
+
+

2.4.4. NVIDIA Tools Extension Callbacks

+
+
+

Using the callback API with the + CUPTI_CB_DOMAIN_NVTX domain, you can associate a + callback function with NVIDIA Tools Extension (NVTX) API + functions. When an NVTX function is invoked in the + application, your callback function is invoked as + well. For these domains, the cbdata argument to + your callback function will be of the type + CUpti_NvtxData. + +

+
+ The NVTX library has its own convention for discovering the + profiling library that will provide the implementation of the + NVTX callbacks. To receive callbacks, you must set the NVTX + environment variables appropriately so that when the + application calls an NVTX function, your profiling library + receives the callbacks. The following code sequence shows a + typical initialization sequence to enable NVTX callbacks and + activity records. +
/* Set env so CUPTI-based profiling library loads on first nvtx call. */
+char *inj32_path = "/path/to/32-bit/version/of/cupti/based/profiling/library";
+char *inj64_path = "/path/to/64-bit/version/of/cupti/based/profiling/library";
+setenv("NVTX_INJECTION32_PATH", inj32_path, 1);
+setenv("NVTX_INJECTION64_PATH", inj64_path, 1);
+

The following code shows a typical sequence used to associate a + callback function with one or more NVTX functions. To simplify the + presentation, error checking code has been removed. +

CUpti_SubscriberHandle subscriber;
+MyDataStruct *my_data = ...;
+...
+cuptiSubscribe(&subscriber,
+               (CUpti_CallbackFunc)my_callback , my_data);
+cuptiEnableDomain(1, subscriber,
+                  CUPTI_CB_DOMAIN_NVTX);

+ First, cuptiSubscribe is used to initialize a subscriber with + the my_callback callback function. Next, + cuptiEnableDomain is used to associate that callback + with all the NVTX functions. Using this code sequence will + cause my_callback to be called once each time any of + the NVTX functions are invoked. CUPTI callback API + functions cuptiEnableCallback and + cuptiEnableAllDomains can also be used to associate NVTX API + functions with a callback (see reference below for more information). + +

+

The following code shows a typical callback function.

void CUPTIAPI
+my_callback(void *userdata, CUpti_CallbackDomain domain,
+            CUpti_CallbackId cbid, const void *cbdata)
+{
+  const CUpti_NvtxData *nvtxInfo = (CUpti_NvtxData *)cbdata;
+  MyDataStruct *my_data = (MyDataStruct *)userdata;
+
+  if ((domain == CUPTI_CB_DOMAIN_NVTX) &&
+      (cbid == CUPTI_CBID_NVTX_nvtxRangeStartEx))  {
+    nvtxRangeStartEx_params *params = (nvtxRangeStartEx_params *)nvtxInfo->
+             functionParams;
+    nvtxRangeId_t *id = (nvtxRangeId_t *)nvtxInfo->functionReturnValue;
+    ...
+  }
+  ...

+ In your callback function, you use the CUpti_CallbackDomain + and CUpti_CallbackID parameters to determine which NVTX API + function invocation is causing this callback. In the example above, we + are checking for the nvtxRangeStartEx function. The + cbdata parameter holds a structure of useful + information that can be used within the callback. In this + case, we use the functionParams member to access the + parameters that were passed to nvtxRangeStartEx. To access + the parameters, we first cast + functionParams to a structure type corresponding to the + nvtxRangeStartEx function. These parameter + structures are contained in generated_nvtx_meta.h. We also use + functionReturnValue member to access the value returned + by nvtxRangeStartEx. To access the return value, we first cast + functionReturnValue to the return type corresponding to the + nvtxRangeStartEx function. If there is no return value for the + NVTX function, functionReturnValue is NULL. + +

+

+ The sample cupti_nvtx + shows the initialization sequence to enable NVTX callbacks and activity records. + +

+

+ If your CUPTI-based profiling library links static CUPTI library, you can define and + export your own NvtxInitializeInjection and NvtxInitializeInjection2 functions, + which would be called by setting the NVTX environment variables. + +

+

+ If you want CUPTI to handle NVTX calls, these functions should call CUPTI's + corresponding initialization functions, as shown in the example below so that when the + application calls a NVTX function, your profiling library receives the callbacks. + The following code sequence shows how this can be done to receive callbacks and + activity records when linking static CUPTI library. + +

/* Set env so CUPTI-based profiling library loads on first nvtx call. */
+char *inj32_path = "/path/to/32-bit/version/of/cupti/based/profiling/library";
+char *inj64_path = "/path/to/64-bit/version/of/cupti/based/profiling/library";
+setenv("NVTX_INJECTION32_PATH", inj32_path, 1);
+setenv("NVTX_INJECTION64_PATH", inj64_path, 1);
+
+/* Extern the CUPTI NVTX initialization APIs. The APIs are thread-safe */
+extern "C" CUptiResult CUPTIAPI cuptiNvtxInitialize(void* pfnGetExportTable);
+extern "C" CUptiResult CUPTIAPI cuptiNvtxInitialize2(void* pfnGetExportTable);
+
+extern "C" int InitializeInjectionNvtx(void* p)
+{
+  CUptiResult res = cuptiNvtxInitialize(p);
+  return (res == CUPTI_SUCCESS) ? 1 : 0;
+}
+
+extern "C" int InitializeInjectionNvtx2(void* p)
+{
+  CUptiResult res = cuptiNvtxInitialize2(p);
+  return (res == CUPTI_SUCCESS) ? 1 : 0;
+}

+ Alternatively, if you want to handle NVTX calls directly in your profiling library, + you can attach your own callbacks to the NVTX client in these functions. + +

+

+ NVTX v1 and v2 both have the initialization code in a single injection library + shared by all users of NVTX in the whole process, so the initialization will + happen only once per process. + NVTX v3 embeds the initialization code into your own binaries, + so if NVTX v3 is in multiple dynamic libraries, each one of those sites + will initialize the first time a NVTX call is made from that dynamic library. + These first calls could be on different threads. + So if you are wiring up your own NVTX handlers, you should ensure that code is + thread-safe when called from multiple threads at once. + +

+
+
+
+
+
+

2.5. CUPTI Event API

+
+
+

The CUPTI Event API allows you to query, configure, start, stop, and + read the event counters on a CUDA-enabled device. The following + terminology is used by the event API. +

+
+
Event
+
An event is a countable activity, action, or occurrence + on a device. +
+
Event ID
+
Each event is assigned a unique identifier. A named + event will represent the same activity, action, or occurrence on all + device types. But the named event may have different IDs on + different device families. Use cuptiEventGetIdFromName to + get the ID for a named event on a particular device. +
+
Event Category
+
Each event is placed in one of the categories + defined by CUpti_EventCategory. The category indicates the + general type of activity, action, or occurrence measured by the + event. +
+
Event Domain
+
A device exposes one or more event domains. Each + event domain represents a group of related events available on that + device. A device may have multiple instances of a domain, indicating + that the device can simultaneously record multiple instances of each + event within that domain. +
+
Event Group
+
An event group is a collection of events that are + managed together. The number and type of events that can be added to + an event group are subject to device-specific limits. At any given + time, a device may be configured to count events from a limited + number of event groups. All events in an event group must belong to + the same event domain. +
+
Event Group Set
+
An event group set is a collection of event + groups that can be enabled at the same time. Event group sets are + created by cuptiEventGroupSetsCreate and + cuptiMetricCreateEventGroupSets. +
+
+

+ You can determine the events available on a device using the + cuptiDeviceEnumEventDomains and + cuptiEventDomainEnumEvents functions. The + cupti_query sample described on the samples page shows + how to use these functions. You can also enumerate all the CUPTI + events available on any device using the + cuptiEnumEventDomains function. + +

+

+ Configuring and reading event counts requires the following steps. + First, select your event collection mode. If you want to count events + that occur during the execution of a kernel, use + cuptiSetEventCollectionMode to set mode + CUPTI_EVENT_COLLECTION_MODE_KERNEL. If you want to + continuously sample the event counts, use mode + CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS. Next, determine + the names of the events that you want to count, and then use the + cuptiEventGroupCreate, cuptiEventGetIdFromName, and + cuptiEventGroupAddEvent functions to create and initialize an + event group with those events. If you are unable to add all the events + to a single event group, then you will need to create multiple event + groups. Alternatively, you can use the + cuptiEventGroupSetsCreate function to automatically create + the event group(s) required for a set of events. + +

+
+ It's possible that all the requested events can't be collected in the single pass due + to hardware or software limitations, one needs to replay the exact same set of GPU + workloads multiple times. + Number of passes can be queried using the API cuptiEventGroupSetsCreate. + Profiling one event always takes single pass. Multiple passes might be required when we + want to profile multiple events together. Code snippet showing how to query number of passes: +
+CUpti_EventGroupSets *eventGroupSets = NULL;
+size_t eventIdArraySize = sizeof(CUpti_EventID) * numEvents;
+CUpti_EventID *eventIdArray = (CUpti_EventID *)malloc(sizeof(CUpti_EventID) * numEvents);
+// fill in event Ids
+cuptiEventGroupSetsCreate(context, eventIdArraySize, eventIdArray, &eventGroupSets);
+// number of passes required to collect all the events
+passes = eventGroupSets->numSets;
+
+

+ To begin counting a set of events, enable the event group + or groups that contain those events by using the + cuptiEventGroupEnable function. If your events + are contained in multiple event groups, you may be unable + to enable all of the event groups at the same time i.e. in + the same pass. In this case, you can gather the + events across multiple executions of the application or + you can enable kernel replay. If you enable kernel replay + using cuptiEnableKernelReplayMode, you will be + able to enable any number of event groups and all the + contained events will be collected. + +

+

+ Use the cuptiEventGroupReadEvent and/or + cuptiEventGroupReadAllEvents functions to read + the event values. When you are done collecting events, use + the cuptiEventGroupDisable function to stop + counting the events contained in an event group. The + callback_event sample described on the samples page shows how to use + these functions to create, enable, and disable event + groups, and how to read event counts. + +

+
Note: + For event collection mode CUPTI_EVENT_COLLECTION_MODE_KERNEL, + event or metric collection may significantly change the overall + performance characteristics of the application because all kernel + executions that occur between the cuptiEventGroupEnable and + cuptiEventGroupDisable calls are serialized on the GPU. + This can be avoided by using mode + CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS, and restricting profiling + to events and metrics that can be collected in a single pass. + +
+
Note: + All the events and metrics except NVLink metrics are collected at the + context level, irrespective of the event collection mode. That is, + events or metrics can be attributed to the context being profiled and + values can be accurately collected, when multiple contexts are executing + on the GPU. NVLink metrics are collected at device level for all event + collection modes. + +
+

+ In a system with multiple GPUs, events can be collected simultaneously + on all the GPUs; in other words, event profiling doesn't enforce any serialization + of work across GPUs. The + event_multi_gpu sample shows how to use the CUPTI event and CUDA APIs + on such setups. + +

+
Note: + Event APIs from the header cupti_events.h are not supported for devices with + compute capability 7.5 and higher. It is advised to use the + CUPTI Profiling API instead. Refer to the section + Migration to the Profiling API. + +
+
+
+
+

2.5.1. Collecting Kernel Execution Events

+
+
+

A common use of the event API is to count a set of events during the + execution of a kernel (as demonstrated by the callback_event + sample). The following code shows a typical callback used for this + purpose. Assume that the callback was enabled only for a kernel launch + using the CUDA runtime (i.e., by cuptiEnableCallback(1, subscriber, CUPTI_CB_DOMAIN_RUNTIME_API, + CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_v3020). To simplify the + presentation, error checking code has been removed. +

static void CUPTIAPI
+getEventValueCallback(void *userdata,
+                      CUpti_CallbackDomain domain,
+                      CUpti_CallbackId cbid,
+                      const void *cbdata)
+{
+  const CUpti_CallbackData *cbData = 
+                (CUpti_CallbackData *)cbdata;
+     
+  if (cbData->callbackSite == CUPTI_API_ENTER) {
+    cudaDeviceSynchronize();
+    cuptiSetEventCollectionMode(cbInfo->context, 
+                                CUPTI_EVENT_COLLECTION_MODE_KERNEL);
+    cuptiEventGroupEnable(eventGroup);
+  }
+    
+  if (cbData->callbackSite == CUPTI_API_EXIT) {
+    cudaDeviceSynchronize();
+    cuptiEventGroupReadEvent(eventGroup, 
+                             CUPTI_EVENT_READ_FLAG_NONE, 
+                             eventId, 
+                             &bytesRead, &eventVal);
+      
+    cuptiEventGroupDisable(eventGroup);
+  }
+}

+ Two synchronization points are used to ensure that events are counted + only for the execution of the kernel. If the application contains + other threads that launch kernels, then additional thread-level + synchronization must also be introduced to ensure that those threads + do not launch kernels while the callback is collecting events. When + the cudaLaunch API is entered (that is, before the kernel is actually + launched on the device), cudaDeviceSynchronize is used to + wait until the GPU is idle. The event collection mode is set to + CUPTI_EVENT_COLLECTION_MODE_KERNEL so that the event + counters are automatically started and stopped just before and after + the kernel executes. Then event collection is enabled with + cuptiEventGroupEnable. + +

+

+ When the cudaLaunch API is exited (that is, after the kernel is queued + for execution on the GPU) another cudaDeviceSynchronize is + used to cause the CPU thread to wait for the kernel to finish + execution. Finally, the event counts are read with + cuptiEventGroupReadEvent. + +

+
+
+
+
+

2.5.2. Sampling Events

+
+
+

The event API can also be used to sample event values while a kernel + or kernels are executing (as demonstrated by the + event_sampling sample). The sample shows one possible way to + perform the sampling. The event collection mode is set to + CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS so that the event + counters run continuously. Two threads are used in + event_sampling: one thread schedules the kernels and memcpys + that perform the computation, while another thread wakes up periodically + to sample an event counter. In this sample, there is no correlation of + the event samples with what is happening on the GPU. To get some + coarse correlation, you can use cuptiDeviceGetTimestamp to + collect the GPU timestamp at the time of the sample and also at other + interesting points in your application. +

+
+
+
+
+
+

2.6. CUPTI Metric API

+
+
+

The CUPTI Metric API allows you to collect application metrics + calculated from one or more event values. The following terminology + is used by the metric API. +

+
+
Metric
+
A characteristic of an application that is calculated + from one or more event values. +
+
Metric ID
+
Each metric is assigned a unique identifier. A named + metric will represent the same characteristic on all device + types. But the named metric may have different IDs on different + device families. Use cuptiMetricGetIdFromName to get the ID + for a named metric on a particular device. +
+
Metric Category
+
Each metric is placed in one of the categories + defined by CUpti_MetricCategory. The category indicates + the general type of the characteristic measured by the metric. +
+
Metric Property
+
Each metric is calculated from input values. These + input values can be events or properties of the device + or system. The available properties are defined by + CUpti_MetricPropertyID. +
+
Metric Value
+
Each metric has a value that represents one of + the kinds defined by CUpti_MetricValueKind. For each value + kind, there is a corresponding member of the + CUpti_MetricValue union that is used to hold the metric's + value. +
+
+
+
+

+ The tables included in this section list the metrics available for + each device, as determined by the device's compute capability. You + can also determine the metrics available on a device using the + cuptiDeviceEnumMetrics function. The cupti_query + sample described on the samples page shows how to use this + function. You can also enumerate all the CUPTI metrics available on + any device using the cuptiEnumMetrics function. + +

+

+ CUPTI provides two functions for calculating a metric + value. cuptiMetricGetValue2 can be used to + calculate a metric value when the device is not + available. All required event values and metric properties + must be provided by the + caller. cuptiMetricGetValue can be used to + calculate a metric value when the device is available (as a + CUdevice object). All required event values must be + provided by the caller, but CUPTI will determine the + appropriate property values from the CUdevice object. + +

+

+ Configuring and calculating metric values requires the + following steps. First, determine the name of the metric + that you want to collect, and then use the + cuptiMetricGetIdFromName to get the metric + ID. Use cuptiMetricEnumEvents to get the events + required to calculate the metric, and follow instructions + in the CUPTI Event API section to create the event groups + for those events. When creating event groups in this + manner, it is important to use the result of + cuptiMetricGetRequiredEventGroupSets to properly + group together events that must be collected in the same + pass to ensure proper metric calculation. + +

+

+ Alternatively, you can use the + cuptiMetricCreateEventGroupSets function to + automatically create the event group(s) required for + metrics' events. When using this function, events will be + grouped as required to most accurately calculate the + metric; as a result, it is not necessary to use + cuptiMetricGetRequiredEventGroupSets. + +

+

+ If you are using cuptiMetricGetValue2, then you must + also collect the required metric property values using + cuptiMetricEnumProperties. + +

+

+ Collect event counts as described in the CUPTI Event API + section, and then use either cuptiMetricGetValue + or cuptiMetricGetValue2 to calculate the metric + value from the collected event and property values. The + callback_metric sample described on the samples page shows how to use + the functions to calculate event values and calculate a + metric using cuptiMetricGetValue. Note that as + shown in the example, you should collect event counts from + all domain instances, and normalize the counts to get the + most accurate metric values. It is necessary to normalize + the event counts because the number of event counter + instances varies by device and by the event being counted. + +

+

+ For example, a device might have 8 multiprocessors but + only have event counters for 4 of the multiprocessors, and + might have 3 memory units and only have events counters + for one memory unit. When calculating a metric that + requires a multiprocessor event and a memory unit event, + the 4 multiprocessor counters should be summed and + multiplied by 2 to normalize the event count across the + entire device. Similarly, the one memory unit counter + should be multiplied by 3 to normalize the event count + across the entire device. The normalized values can then + be passed to cuptiMetricGetValue or + cuptiMetricGetValue2 to calculate the metric + value. + +

+

+ As described, the normalization assumes the kernel executes a + sufficient number of blocks to completely load the device. If the + kernel has only a small number of blocks, normalizing across the + entire device may skew the result. + +

+
+ It's possible that all the requested metrics can't be collected in the single pass due + to hardware or software limitations, one needs to replay the exact same set of GPU + workloads multiple times. + Number of passes can be queried using the API cuptiMetricCreateEventGroupSets. + Profiling a single metric can also take multiple passes depending on the number and + type of events it is calculated from. Code snippet showing how to query number of passes: +
+CUpti_EventGroupSets *eventGroupSets = NULL;
+size_t metricIdArraySize = sizeof(CUpti_MetricID) * numMetrics;
+CUpti_MetricID metricIdArray = (CUpti_MetricID *)malloc(sizeof(CUpti_MetricID) * numMetrics);
+// fill in metric Ids
+cuptiMetricCreateEventGroupSets(context, metricIdArraySize, metricIdArray, &eventGroupSets);
+// number of passes required to collect all the metrics
+passes = eventGroupSets->numSets;
+
+
Note: + Metric APIs from the header cupti_metrics.h are not supported for devices with + compute capability 7.5 and higher. It is advised to use the + CUPTI Profiling API instead. Refer to the section + Migration to the Profiling API. + +
+
+
+
+

2.6.1. Metrics Reference

+
+

This section contains detailed descriptions of the metrics that can be + collected by the CUPTI. A scope value of "Single-context" indicates that the + metric can only be accurately collected when a single context (CUDA or graphics) is + executing on the GPU. A scope value of "Multi-context" indicates that the + metric can be accurately collected when multiple contexts are executing on + the GPU. A scope value of "Device" indicates that the metric will be collected + at device level, that is, it will include values for all the contexts executing + on the GPU. +

+
+
+
2.6.1.1. Metrics for Capability 3.x
+
+

+ Devices with compute capability 3.x implement the metrics shown in the + following table. Note that for some metrics, the "Multi-context" scope + is supported only for specific devices. Such metrics are marked with + "Multi-context*" under the "Scope" column. Refer to the note + at the bottom of the table. + +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1. Capability 3.x Metrics
Metric NameDescriptionScope
achieved_occupancyRatio of the average active warps per active cycle + to the maximum number of warps supported on a + multiprocessor + Multi-context
alu_fu_utilizationThe utilization level of the multiprocessor function units that execute integer and floating-point arithmetic instructions + on a scale of 0 to 10 + Multi-context
atomic_replay_overheadAverage number of replays due to atomic and reduction bank conflicts for each instruction executedMulti-context
atomic_throughputGlobal memory atomic and reduction throughputMulti-context
atomic_transactionsGlobal memory atomic and reduction transactionsMulti-context
atomic_transactions_per_requestAverage number of global memory atomic and reduction + transactions performed for each atomic and reduction instruction + Multi-context
branch_efficiencyRatio of non-divergent branches to total branches expressed as percentage. + This is available for compute capability 3.0. + + Multi-context
cf_executedNumber of executed control-flow instructionsMulti-context
cf_fu_utilizationThe utilization level of the multiprocessor function units that execute control-flow instructions on a scale of 0 to 10Multi-context
cf_issuedNumber of issued control-flow instructionsMulti-context
dram_read_throughput + Device memory read throughput. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
dram_read_transactions + Device memory read transactions. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
dram_utilizationThe utilization level of the device memory relative to the peak utilization on a scale of 0 to 10Multi-context*
dram_write_throughput + Device memory write throughput. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
dram_write_transactions + Device memory write transactions. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
ecc_throughput + ECC throughput from L2 to DRAM. + This is available for compute capability 3.5 and 3.7. + + Multi-context*
ecc_transactions + Number of ECC transactions between L2 and DRAM. + This is available for compute capability 3.5 and 3.7. + + Multi-context*
eligible_warps_per_cycleAverage number of warps that are eligible to issue per active cycleMulti-context
flop_count_dpNumber of double-precision floating-point operations executed by + non-predicated threads (add, multiply and multiply-accumulate). + Each multiply-accumulate operation contributes 2 to the count. + Multi-context
flop_count_dp_addNumber of double-precision floating-point add operations executed by non-predicated threadsMulti-context
flop_count_dp_fmaNumber of double-precision floating-point multiply-accumulate operations + executed by non-predicated threads. Each multiply-accumulate operation + contributes 1 to the count. + Multi-context
flop_count_dp_mulNumber of double-precision floating-point multiply operations executed by non-predicated threadsMulti-context
flop_count_spNumber of single-precision floating-point operations executed by + non-predicated threads (add, multiply and multiply-accumulate). + Each multiply-accumulate operation contributes 2 to the count. + The count does not include special operations. + Multi-context
flop_count_sp_addNumber of single-precision floating-point add operations executed by non-predicated threadsMulti-context
flop_count_sp_fmaNumber of single-precision floating-point multiply-accumulate operations + executed by non-predicated threads. Each multiply-accumulate operation + contributes 1 to the count. + Multi-context
flop_count_sp_mulNumber of single-precision floating-point multiply operations executed by non-predicated threadsMulti-context
flop_count_sp_specialNumber of single-precision floating-point special operations executed by non-predicated threadsMulti-context
flop_dp_efficiencyRatio of achieved to peak double-precision floating-point operationsMulti-context
flop_sp_efficiencyRatio of achieved to peak single-precision floating-point operationsMulti-context
gld_efficiencyRatio of requested global memory load throughput to + required global memory load throughput expressed as percentage + Multi-context*
gld_requested_throughputRequested global memory load throughputMulti-context
gld_throughputGlobal memory load throughputMulti-context*
gld_transactionsNumber of global memory load transactionsMulti-context*
gld_transactions_per_requestAverage number of global memory load transactions performed for each global memory loadMulti-context*
global_cache_replay_overheadAverage number of replays due to global memory + cache misses for each instruction executed + Multi-context
global_replay_overheadAverage number of replays due to global memory + cache misses + Multi-context
gst_efficiencyRatio of requested global memory store throughput + to required global memory store throughput expressed as percentage + Multi-context*
gst_requested_throughputRequested global memory store throughputMulti-context
gst_throughputGlobal memory store throughputMulti-context*
gst_transactionsNumber of global memory store transactionsMulti-context*
gst_transactions_per_requestAverage number of global memory store transactions performed for each global memory storeMulti-context*
inst_bit_convertNumber of bit-conversion instructions executed by non-predicated threadsMulti-context
inst_compute_ld_stNumber of compute load/store instructions executed by non-predicated threadsMulti-context
inst_controlNumber of control-flow instructions executed by non-predicated threads (jump, branch, etc.)Multi-context
inst_executedThe number of instructions executedMulti-context
inst_fp_32Number of single-precision floating-point instructions executed by non-predicated threads (arithmetic, compare, etc.)Multi-context
inst_fp_64Number of double-precision floating-point instructions executed by non-predicated threads (arithmetic, compare, etc.)Multi-context
inst_integerNumber of integer instructions executed by non-predicated threadsMulti-context
inst_inter_thread_communicationNumber of inter-thread communication instructions executed by non-predicated threadsMulti-context
inst_issuedThe number of instructions issuedMulti-context
inst_miscNumber of miscellaneous instructions executed by non-predicated threadsMulti-context
inst_per_warpAverage number of instructions executed by each warpMulti-context
inst_replay_overheadAverage number of replays for each instruction executedMulti-context
ipcInstructions executed per cycleMulti-context
ipc_instanceInstructions executed per cycle for a single multiprocessorMulti-context
issue_slot_utilizationPercentage of issue slots that issued at least one + instruction, averaged across all cycles + Multi-context
issue_slotsThe number of issue slots usedMulti-context
issued_ipcInstructions issued per cycleMulti-context
l1_cache_global_hit_rateHit rate in L1 cache for global loadsMulti-context*
l1_cache_local_hit_rateHit rate in L1 cache for local loads and storesMulti-context*
l1_shared_utilizationThe utilization level of the L1/shared memory relative to peak utilization on a scale of 0 to 10. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
l2_atomic_throughputMemory read throughput seen at L2 cache for + atomic and reduction requests + Multi-context*
l2_atomic_transactionsMemory read transactions seen at L2 cache for atomic and reduction requestsMulti-context*
l2_l1_read_hit_rateHit rate at L2 cache for all read requests from L1 cache. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
l2_l1_read_throughputMemory read throughput seen at L2 cache for read + requests from L1 cache. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
l2_l1_read_transactions + Memory read transactions seen at L2 cache for all read requests from L1 cache. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
l2_l1_write_throughput + Memory write throughput seen at L2 cache for write requests from L1 cache. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
l2_l1_write_transactions + Memory write transactions seen at L2 cache for all write requests from L1 cache. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
l2_read_throughputMemory read throughput seen at L2 cache for all read requestsMulti-context*
l2_read_transactionsMemory read transactions seen at L2 cache for all read requestsMulti-context*
l2_tex_read_transactionsMemory read transactions seen at L2 cache for read requests from the texture cacheMulti-context*
l2_tex_read_hit_rate + Hit rate at L2 cache for all read requests from texture cache. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
l2_tex_read_throughputMemory read throughput seen at L2 cache for read + requests from the texture cache + Multi-context*
l2_utilizationThe utilization level of the L2 cache relative to the peak utilization on a scale of 0 to 10Multi-context*
l2_write_throughputMemory write throughput seen at L2 cache for all write requestsMulti-context*
l2_write_transactionsMemory write transactions seen at L2 cache for all write requestsMulti-context*
ldst_executedNumber of executed local, global, shared and texture memory load and store instructionsMulti-context
ldst_fu_utilizationThe utilization level of the multiprocessor function units that execute global, local and shared memory instructions on a + scale of 0 to 10 + Multi-context
ldst_issuedNumber of issued local, global, shared and texture memory load and store instructionsMulti-context
local_load_throughputLocal memory load throughputMulti-context*
local_load_transactionsNumber of local memory load transactionsMulti-context*
local_load_transactions_per_requestAverage number of local memory load transactions performed for each local memory loadMulti-context*
local_memory_overheadRatio of local memory traffic to total memory + traffic between the L1 and L2 caches expressed as percentage. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
local_replay_overheadAverage number of replays due to local memory + accesses for each instruction executed + Multi-context
local_store_throughputLocal memory store throughputMulti-context*
local_store_transactionsNumber of local memory store transactionsMulti-context*
local_store_transactions_per_requestAverage number of local memory store transactions performed for each local memory storeMulti-context*
nc_cache_global_hit_rateHit rate in non coherent cache for global loadsMulti-context*
nc_gld_efficiencyRatio of requested non coherent global memory load throughput to required non coherent global memory load throughput expressed + as percentage + Multi-context*
nc_gld_requested_throughputRequested throughput for global memory loaded via non-coherent cacheMulti-context
nc_gld_throughputNon coherent global memory load throughputMulti-context*
nc_l2_read_throughputMemory read throughput for non coherent global read requests seen at L2 cacheMulti-context*
nc_l2_read_transactionsMemory read transactions seen at L2 cache for non coherent global read requestsMulti-context*
shared_efficiencyRatio of requested shared memory throughput to required shared memory throughput expressed as percentageMulti-context*
shared_load_throughputShared memory load throughputMulti-context*
shared_load_transactionsNumber of shared memory load transactionsMulti-context*
shared_load_transactions_per_requestAverage number of shared memory load transactions performed for each shared memory loadMulti-context*
shared_replay_overheadAverage number of replays due to shared memory + conflicts for each instruction executed + Multi-context
shared_store_throughputShared memory store throughputMulti-context*
shared_store_transactionsNumber of shared memory store transactionsMulti-context*
shared_store_transactions_per_requestAverage number of shared memory store transactions performed for each shared memory storeMulti-context*
sm_efficiencyThe percentage of time at least one warp is active + on a multiprocessor averaged over all multiprocessors on the GPU + Multi-context*
sm_efficiency_instanceThe percentage of time at least one warp is active + on a specific multiprocessor + Multi-context*
stall_constant_memory_dependency + Percentage of stalls occurring because of immediate constant cache miss. + This is available for compute capability 3.2, 3.5 and 3.7. + + Multi-context
stall_exec_dependencyPercentage of stalls occurring because an input required by the instruction is not yet availableMulti-context
stall_inst_fetchPercentage of stalls occurring because the next assembly instruction has not yet been fetchedMulti-context
stall_memory_dependency + Percentage of stalls occurring because a memory operation cannot be performed due to the required resources not being available + or fully utilized, or because too many requests of a given type are outstanding. + + Multi-context
stall_memory_throttle + Percentage of stalls occurring because of memory throttle. + + Multi-context
stall_not_selected + Percentage of stalls occurring because warp was not selected. + + Multi-context
stall_otherPercentage of stalls occurring due to miscellaneous reasonsMulti-context
stall_pipe_busy + Percentage of stalls occurring because a compute operation cannot be performed because the compute pipeline is busy. + This is available for compute capability 3.2, 3.5 and 3.7. + + Multi-context
stall_syncPercentage of stalls occurring because the warp is blocked at a __syncthreads() callMulti-context
stall_texturePercentage of stalls occurring because the texture sub-system is fully utilized or has too many outstanding requestsMulti-context
sysmem_read_throughputSystem memory read throughput. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
sysmem_read_transactionsSystem memory read transactions. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
sysmem_read_utilizationThe read utilization level of the system memory relative to the peak utilization on a scale of 0 to 10. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context
sysmem_utilizationThe utilization level of the system memory relative to the peak utilization on a scale of 0 to 10. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
sysmem_write_throughputSystem memory write throughput. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
sysmem_write_transactionsSystem memory write transactions. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context*
sysmem_write_utilizationThe write utilization level of the system memory relative to the peak utilization on a scale of 0 to 10. + This is available for compute capability 3.0, 3.5 and 3.7. + + Multi-context
tex_cache_hit_rateTexture cache hit rateMulti-context*
tex_cache_throughputTexture cache throughputMulti-context*
tex_cache_transactionsTexture cache read transactionsMulti-context*
tex_fu_utilizationThe utilization level of the multiprocessor function units that execute texture instructions on a scale of 0 to 10Multi-context
tex_utilizationThe utilization level of the texture cache relative to the peak utilization on a scale of 0 to 10Multi-context*
warp_execution_efficiencyRatio of the average active threads per warp to the + maximum number of threads per warp supported on a + multiprocessor expressed as percentage + Multi-context
warp_nonpred_execution_efficiencyRatio of the average active threads per warp + executing non-predicated instructions to the maximum + number of threads per warp supported on a + multiprocessor expressed as percentage + Multi-context
+
+

* The "Multi-context" scope for this metric is supported only for + devices with compute capability 3.0, 3.5, and 3.7. +

+
+
+
+
2.6.1.2. Metrics for Capability 5.x
+
+

+ Devices with compute capability 5.x implement the metrics shown in the + following table. Note that for some metrics, the "Multi-context" scope + is supported only for specific devices. Such metrics are marked with + "Multi-context*" under the "Scope" column. Refer to the note + at the bottom of the table. + +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2. Capability 5.x Metrics
Metric NameDescriptionScope
achieved_occupancyRatio of the average active warps per active cycle to the maximum number of + warps supported on a multiprocessor + Multi-context
atomic_transactionsGlobal memory atomic and reduction transactionsMulti-context
atomic_transactions_per_requestAverage number of global memory atomic and reduction transactions performed for + each atomic and reduction instruction + Multi-context
branch_efficiencyRatio of non-divergent branches to total branches expressed as + percentage + Multi-context
cf_executedNumber of executed control-flow instructionsMulti-context
cf_fu_utilizationThe utilization level of the multiprocessor function units that execute + control-flow instructions on a scale of 0 to 10 + Multi-context
cf_issuedNumber of issued control-flow instructionsMulti-context
double_precision_fu_utilizationThe utilization level of the multiprocessor function units that execute + double-precision floating-point instructions on a scale of 0 to 10 + Multi-context
dram_read_bytesTotal bytes read from DRAM to L2 cache. + This is available for compute capability 5.0 and 5.2. + Multi-context*
dram_read_throughputDevice memory read throughput. + This is available for compute capability 5.0 and 5.2. + Multi-context*
dram_read_transactionsDevice memory read transactions. + This is available for compute capability 5.0 and 5.2. + Multi-context*
dram_utilizationThe utilization level of the device memory relative to the peak utilization on a + scale of 0 to 10 + Multi-context*
dram_write_bytesTotal bytes written from L2 cache to DRAM. + This is available for compute capability 5.0 and 5.2. + Multi-context*
dram_write_throughputDevice memory write throughput. + This is available for compute capability 5.0 and 5.2. + Multi-context*
dram_write_transactionsDevice memory write transactions. + This is available for compute capability 5.0 and 5.2. + Multi-context*
ecc_throughputECC throughput from L2 to DRAM. + This is available for compute capability 5.0 and 5.2. + Multi-context*
ecc_transactionsNumber of ECC transactions between L2 and DRAM. + This is available for compute capability 5.0 and 5.2. + Multi-context*
eligible_warps_per_cycleAverage number of warps that are eligible to issue per active cycleMulti-context
flop_count_dpNumber of double-precision floating-point operations executed by non-predicated + threads (add, multiply, and multiply-accumulate). Each multiply-accumulate operation + contributes 2 to the count. + Multi-context
flop_count_dp_addNumber of double-precision floating-point add operations executed by + non-predicated threads. + Multi-context
flop_count_dp_fmaNumber of double-precision floating-point multiply-accumulate operations + executed by non-predicated threads. Each multiply-accumulate operation contributes 1 to + the count. + Multi-context
flop_count_dp_mulNumber of double-precision floating-point multiply operations executed by + non-predicated threads. + Multi-context
flop_count_hpNumber of half-precision floating-point operations executed by + non-predicated threads (add, multiply and multiply-accumulate). + Each multiply-accumulate operation contributes 2 to the count. + This is available for compute capability 5.3. + Multi-context*
flop_count_hp_addNumber of half-precision floating-point add operations executed by non-predicated threads. + This is available for compute capability 5.3. + Multi-context*
flop_count_hp_fmaNumber of half-precision floating-point multiply-accumulate operations + executed by non-predicated threads. Each multiply-accumulate operation + contributes 1 to the count. + This is available for compute capability 5.3. + Multi-context*
flop_count_hp_mulNumber of half-precision floating-point multiply operations executed by non-predicated threads. + This is available for compute capability 5.3. + Multi-context*
flop_count_spNumber of single-precision floating-point operations executed by non-predicated + threads (add, multiply, and multiply-accumulate). Each multiply-accumulate operation + contributes 2 to the count. The count does not include special operations. + Multi-context
flop_count_sp_addNumber of single-precision floating-point add operations executed by + non-predicated threads. + Multi-context
flop_count_sp_fmaNumber of single-precision floating-point multiply-accumulate operations + executed by non-predicated threads. Each multiply-accumulate operation contributes 1 to + the count. + Multi-context
flop_count_sp_mulNumber of single-precision floating-point multiply operations executed by + non-predicated threads. + Multi-context
flop_count_sp_specialNumber of single-precision floating-point special operations executed by + non-predicated threads. + Multi-context
flop_dp_efficiencyRatio of achieved to peak double-precision floating-point operationsMulti-context
flop_hp_efficiencyRatio of achieved to peak half-precision floating-point operations. + This is available for compute capability 5.3. + Multi-context*
flop_sp_efficiencyRatio of achieved to peak single-precision floating-point operationsMulti-context
gld_efficiencyRatio of requested global memory load throughput to required global memory load + throughput expressed as percentage. + Multi-context*
gld_requested_throughputRequested global memory load throughputMulti-context
gld_throughputGlobal memory load throughputMulti-context*
gld_transactionsNumber of global memory load transactionsMulti-context*
gld_transactions_per_requestAverage number of global memory load transactions performed for each global + memory load. + Multi-context*
global_atomic_requestsTotal number of global atomic(Atom and Atom CAS) requests from + Multiprocessor + Multi-context
global_hit_rateHit rate for global loads in unified l1/tex cache. Metric value maybe wrong if + malloc is used in kernel. + Multi-context*
global_load_requestsTotal number of global load requests from MultiprocessorMulti-context
global_reduction_requestsTotal number of global reduction requests from MultiprocessorMulti-context
global_store_requestsTotal number of global store requests from Multiprocessor. This does not include + atomic requests. + Multi-context
gst_efficiencyRatio of requested global memory store throughput to required global memory + store throughput expressed as percentage. + Multi-context*
gst_requested_throughputRequested global memory store throughputMulti-context
gst_throughputGlobal memory store throughputMulti-context*
gst_transactionsNumber of global memory store transactionsMulti-context*
gst_transactions_per_requestAverage number of global memory store transactions performed for each global + memory store + Multi-context*
half_precision_fu_utilizationThe utilization level of the multiprocessor function units that execute 16 bit floating-point instructions and integer instructions + on a scale of 0 to 10. + This is available for compute capability 5.3. + + Multi-context*
inst_bit_convertNumber of bit-conversion instructions executed by non-predicated threadsMulti-context
inst_compute_ld_stNumber of compute load/store instructions executed by non-predicated + threads + Multi-context
inst_controlNumber of control-flow instructions executed by non-predicated threads (jump, + branch, etc.) + Multi-context
inst_executedThe number of instructions executedMulti-context
inst_executed_global_atomicsWarp level instructions for global atom and atom casMulti-context
inst_executed_global_loadsWarp level instructions for global loadsMulti-context
inst_executed_global_reductionsWarp level instructions for global reductionsMulti-context
inst_executed_global_storesWarp level instructions for global storesMulti-context
inst_executed_local_loadsWarp level instructions for local loadsMulti-context
inst_executed_local_storesWarp level instructions for local storesMulti-context
inst_executed_shared_atomicsWarp level shared instructions for atom and atom CASMulti-context
inst_executed_shared_loadsWarp level instructions for shared loadsMulti-context
inst_executed_shared_storesWarp level instructions for shared storesMulti-context
inst_executed_surface_atomicsWarp level instructions for surface atom and atom casMulti-context
inst_executed_surface_loadsWarp level instructions for surface loadsMulti-context
inst_executed_surface_reductionsWarp level instructions for surface reductionsMulti-context
inst_executed_surface_storesWarp level instructions for surface storesMulti-context
inst_executed_tex_opsWarp level instructions for textureMulti-context
inst_fp_16Number of half-precision floating-point instructions executed by non-predicated threads (arithmetic, compare, etc.) + This is available for compute capability 5.3. + + Multi-context*
inst_fp_32Number of single-precision floating-point instructions executed by + non-predicated threads (arithmetic, compare, etc.) + Multi-context
inst_fp_64Number of double-precision floating-point instructions executed by + non-predicated threads (arithmetic, compare, etc.) + Multi-context
inst_integerNumber of integer instructions executed by non-predicated threadsMulti-context
inst_inter_thread_communicationNumber of inter-thread communication instructions executed by non-predicated + threads + Multi-context
inst_issuedThe number of instructions issuedMulti-context
inst_miscNumber of miscellaneous instructions executed by non-predicated threadsMulti-context
inst_per_warpAverage number of instructions executed by each warpMulti-context
inst_replay_overheadAverage number of replays for each instruction executedMulti-context
ipcInstructions executed per cycleMulti-context
issue_slot_utilizationPercentage of issue slots that issued at least one instruction, averaged across + all cycles + Multi-context
issue_slotsThe number of issue slots usedMulti-context
issued_ipcInstructions issued per cycleMulti-context
l2_atomic_throughputMemory read throughput seen at L2 cache for atomic and reduction + requests + Multi-context
l2_atomic_transactionsMemory read transactions seen at L2 cache for atomic and reduction + requests + Multi-context*
l2_global_atomic_store_bytesBytes written to L2 from Unified cache for global atomics (ATOM and ATOM + CAS) + Multi-context*
l2_global_load_bytesBytes read from L2 for misses in Unified Cache for global loadsMulti-context*
l2_global_reduction_bytesBytes written to L2 from Unified cache for global reductionsMulti-context*
l2_local_global_store_bytesBytes written to L2 from Unified Cache for local and global stores. This does + not include global atomics. + Multi-context*
l2_local_load_bytesBytes read from L2 for misses in Unified Cache for local loadsMulti-context*
l2_read_throughputMemory read throughput seen at L2 cache for all read requestsMulti-context*
l2_read_transactionsMemory read transactions seen at L2 cache for all read requestsMulti-context*
l2_surface_atomic_store_bytesBytes transferred between Unified Cache and L2 for surface atomics (ATOM and + ATOM CAS) + Multi-context*
l2_surface_load_bytesBytes read from L2 for misses in Unified Cache for surface loadsMulti-context*
l2_surface_reduction_bytesBytes written to L2 from Unified Cache for surface reductionsMulti-context*
l2_surface_store_bytesBytes written to L2 from Unified Cache for surface stores. This does not include + surface atomics. + Multi-context*
l2_tex_hit_rateHit rate at L2 cache for all requests from texture cacheMulti-context*
l2_tex_read_hit_rateHit rate at L2 cache for all read requests from texture cache. + This is available for compute capability 5.0 and 5.2. + + Multi-context*
l2_tex_read_throughputMemory read throughput seen at L2 cache for read requests from the texture + cache + Multi-context*
l2_tex_read_transactionsMemory read transactions seen at L2 cache for read requests from the texture + cache + Multi-context*
l2_tex_write_hit_rateHit Rate at L2 cache for all write requests from texture cache. + This is available for compute capability 5.0 and 5.2. + + Multi-context*
l2_tex_write_throughputMemory write throughput seen at L2 cache for write requests from the texture + cache + Multi-context*
l2_tex_write_transactionsMemory write transactions seen at L2 cache for write requests from the texture + cache + Multi-context*
l2_utilizationThe utilization level of the L2 cache relative to the peak utilization on a + scale of 0 to 10 + Multi-context*
l2_write_throughputMemory write throughput seen at L2 cache for all write requestsMulti-context*
l2_write_transactionsMemory write transactions seen at L2 cache for all write requestsMulti-context*
ldst_executedNumber of executed local, global, shared and texture memory load and store + instructions + Multi-context
ldst_fu_utilizationThe utilization level of the multiprocessor function units that execute shared + load, shared store and constant load instructions on a scale of 0 to 10 + Multi-context
ldst_issuedNumber of issued local, global, shared and texture memory load and store + instructions + Multi-context
local_hit_rateHit rate for local loads and storesMulti-context*
local_load_requestsTotal number of local load requests from MultiprocessorMulti-context*
local_load_throughputLocal memory load throughputMulti-context*
local_load_transactionsNumber of local memory load transactionsMulti-context*
local_load_transactions_per_requestAverage number of local memory load transactions performed for each local memory + load + Multi-context*
local_memory_overheadRatio of local memory traffic to total memory traffic between the L1 and L2 + caches expressed as percentage + Multi-context*
local_store_requestsTotal number of local store requests from MultiprocessorMulti-context*
local_store_throughputLocal memory store throughputMulti-context*
local_store_transactionsNumber of local memory store transactionsMulti-context*
local_store_transactions_per_requestAverage number of local memory store transactions performed for each local + memory store + Multi-context*
pcie_total_data_receivedTotal data bytes received through PCIeDevice
pcie_total_data_transmittedTotal data bytes transmitted through PCIeDevice
shared_efficiencyRatio of requested shared memory throughput to required shared memory throughput + expressed as percentage + Multi-context*
shared_load_throughputShared memory load throughputMulti-context*
shared_load_transactionsNumber of shared memory load transactionsMulti-context*
shared_load_transactions_per_requestAverage number of shared memory load transactions performed for each shared + memory load + Multi-context*
shared_store_throughputShared memory store throughputMulti-context*
shared_store_transactionsNumber of shared memory store transactionsMulti-context*
shared_store_transactions_per_requestAverage number of shared memory store transactions performed for each shared + memory store + Multi-context*
shared_utilizationThe utilization level of the shared memory relative to peak utilization on a + scale of 0 to 10 + Multi-context*
single_precision_fu_utilizationThe utilization level of the multiprocessor function units that execute + single-precision floating-point instructions and integer instructions on a scale of 0 + to 10 + Multi-context
sm_efficiencyThe percentage of time at least one warp is active on a specific + multiprocessor + Multi-context*
special_fu_utilizationThe utilization level of the multiprocessor function units that execute sin, + cos, ex2, popc, flo, and similar instructions on a scale of 0 to 10 + Multi-context
stall_constant_memory_dependencyPercentage of stalls occurring because of immediate constant cache missMulti-context
stall_exec_dependencyPercentage of stalls occurring because an input required by the instruction is + not yet available + Multi-context
stall_inst_fetchPercentage of stalls occurring because the next assembly instruction has not yet + been fetched + Multi-context
stall_memory_dependencyPercentage of stalls occurring because a memory operation cannot be performed + due to the required resources not being available or fully utilized, or because too + many requests of a given type are outstanding + Multi-context
stall_memory_throttlePercentage of stalls occurring because of memory throttleMulti-context
stall_not_selectedPercentage of stalls occurring because warp was not selectedMulti-context
stall_otherPercentage of stalls occurring due to miscellaneous reasonsMulti-context
stall_pipe_busyPercentage of stalls occurring because a compute operation cannot be performed + because the compute pipeline is busy + Multi-context
stall_syncPercentage of stalls occurring because the warp is blocked at a __syncthreads() + call + Multi-context
stall_texturePercentage of stalls occurring because the texture sub-system is fully utilized + or has too many outstanding requests + Multi-context
surface_atomic_requestsTotal number of surface atomic(Atom and Atom CAS) requests from + Multiprocessor + Multi-context
surface_load_requestsTotal number of surface load requests from MultiprocessorMulti-context
surface_reduction_requestsTotal number of surface reduction requests from MultiprocessorMulti-context
surface_store_requestsTotal number of surface store requests from MultiprocessorMulti-context
sysmem_read_bytesNumber of bytes read from system memoryMulti-context*
sysmem_read_throughputSystem memory read throughputMulti-context*
sysmem_read_transactionsNumber of system memory read transactionsMulti-context*
sysmem_read_utilizationThe read utilization level of the system memory relative to the peak utilization + on a scale of 0 to 10. + This is available for compute capability 5.0 and 5.2. + Multi-context
sysmem_utilizationThe utilization level of the system memory relative to the peak utilization on a + scale of 0 to 10. + This is available for compute capability 5.0 and 5.2. + Multi-context*
sysmem_write_bytesNumber of bytes written to system memoryMulti-context*
sysmem_write_throughputSystem memory write throughputMulti-context*
sysmem_write_transactionsNumber of system memory write transactionsMulti-context*
sysmem_write_utilizationThe write utilization level of the system memory relative to the peak + utilization on a scale of 0 to 10. + This is available for compute capability 5.0 and 5.2. + Multi-context*
tex_cache_hit_rateUnified cache hit rateMulti-context*
tex_cache_throughputUnified cache throughputMulti-context*
tex_cache_transactionsUnified cache read transactionsMulti-context*
tex_fu_utilizationThe utilization level of the multiprocessor function units that execute global, + local and texture memory instructions on a scale of 0 to 10 + Multi-context
tex_utilizationThe utilization level of the unified cache relative to the peak utilization on a + scale of 0 to 10 + Multi-context*
texture_load_requestsTotal number of texture Load requests from MultiprocessorMulti-context
warp_execution_efficiencyRatio of the average active threads per warp to the maximum number of threads + per warp supported on a multiprocessor + Multi-context
warp_nonpred_execution_efficiencyRatio of the average active threads per warp executing non-predicated + instructions to the maximum number of threads per warp supported on a + multiprocessor + Multi-context
+
+

* The "Multi-context" scope for this metric is supported only for + devices with compute capability 5.0 and 5.2.

+
+
+
+
2.6.1.3. Metrics for Capability 6.x
+
+

+ Devices with compute capability 6.x implement the metrics shown in the + following table. + +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 3. Capability 6.x Metrics
Metric NameDescriptionScope
achieved_occupancyRatio of the average active warps per active cycle to the maximum number of + warps supported on a multiprocessor + Multi-context
atomic_transactionsGlobal memory atomic and reduction transactionsMulti-context
atomic_transactions_per_requestAverage number of global memory atomic and reduction transactions performed for + each atomic and reduction instruction + Multi-context
branch_efficiencyRatio of non-divergent branches to total branches expressed as + percentage + Multi-context
cf_executedNumber of executed control-flow instructionsMulti-context
cf_fu_utilizationThe utilization level of the multiprocessor function units that execute + control-flow instructions on a scale of 0 to 10 + Multi-context
cf_issuedNumber of issued control-flow instructionsMulti-context
double_precision_fu_utilizationThe utilization level of the multiprocessor function units that execute + double-precision floating-point instructions on a scale of 0 to 10 + Multi-context
dram_read_bytesTotal bytes read from DRAM to L2 cacheMulti-context
dram_read_throughputDevice memory read throughput. + This is available for compute capability 6.0 and 6.1. + Multi-context
dram_read_transactionsDevice memory read transactions. + This is available for compute capability 6.0 and 6.1. + Multi-context
dram_utilizationThe utilization level of the device memory relative to the peak utilization on a + scale of 0 to 10 + Multi-context
dram_write_bytesTotal bytes written from L2 cache to DRAMMulti-context
dram_write_throughputDevice memory write throughput. + This is available for compute capability 6.0 and 6.1. + Multi-context
dram_write_transactionsDevice memory write transactions. + This is available for compute capability 6.0 and 6.1. + Multi-context
ecc_throughputECC throughput from L2 to DRAM. + This is available for compute capability 6.1. + Multi-context
ecc_transactionsNumber of ECC transactions between L2 and DRAM. + This is available for compute capability 6.1. + Multi-context
eligible_warps_per_cycleAverage number of warps that are eligible to issue per active cycleMulti-context
flop_count_dpNumber of double-precision floating-point operations executed by non-predicated + threads (add, multiply, and multiply-accumulate). Each multiply-accumulate operation + contributes 2 to the count. + Multi-context
flop_count_dp_addNumber of double-precision floating-point add operations executed by + non-predicated threads. + Multi-context
flop_count_dp_fmaNumber of double-precision floating-point multiply-accumulate operations + executed by non-predicated threads. Each multiply-accumulate operation contributes 1 to + the count. + Multi-context
flop_count_dp_mulNumber of double-precision floating-point multiply operations executed by + non-predicated threads. + Multi-context
flop_count_hpNumber of half-precision floating-point operations executed by non-predicated + threads (add, multiply, and multiply-accumulate). Each multiply-accumulate operation + contributes 2 to the count. + Multi-context
flop_count_hp_addNumber of half-precision floating-point add operations executed by + non-predicated threads. + Multi-context
flop_count_hp_fmaNumber of half-precision floating-point multiply-accumulate operations executed + by non-predicated threads. Each multiply-accumulate operation contributes 1 to the + count. + Multi-context
flop_count_hp_mulNumber of half-precision floating-point multiply operations executed by + non-predicated threads. + Multi-context
flop_count_spNumber of single-precision floating-point operations executed by non-predicated + threads (add, multiply, and multiply-accumulate). Each multiply-accumulate operation + contributes 2 to the count. The count does not include special operations. + Multi-context
flop_count_sp_addNumber of single-precision floating-point add operations executed by + non-predicated threads. + Multi-context
flop_count_sp_fmaNumber of single-precision floating-point multiply-accumulate operations + executed by non-predicated threads. Each multiply-accumulate operation contributes 1 to + the count. + Multi-context
flop_count_sp_mulNumber of single-precision floating-point multiply operations executed by + non-predicated threads. + Multi-context
flop_count_sp_specialNumber of single-precision floating-point special operations executed by + non-predicated threads. + Multi-context
flop_dp_efficiencyRatio of achieved to peak double-precision floating-point operationsMulti-context
flop_hp_efficiencyRatio of achieved to peak half-precision floating-point operationsMulti-context
flop_sp_efficiencyRatio of achieved to peak single-precision floating-point operationsMulti-context
gld_efficiencyRatio of requested global memory load throughput to required global memory load + throughput expressed as percentage. + Multi-context
gld_requested_throughputRequested global memory load throughputMulti-context
gld_throughputGlobal memory load throughputMulti-context
gld_transactionsNumber of global memory load transactionsMulti-context
gld_transactions_per_requestAverage number of global memory load transactions performed for each global + memory load. + Multi-context
global_atomic_requestsTotal number of global atomic(Atom and Atom CAS) requests from + Multiprocessor + Multi-context
global_hit_rateHit rate for global loads in unified l1/tex cache. Metric value maybe wrong if + malloc is used in kernel. + Multi-context
global_load_requestsTotal number of global load requests from MultiprocessorMulti-context
global_reduction_requestsTotal number of global reduction requests from MultiprocessorMulti-context
global_store_requestsTotal number of global store requests from Multiprocessor. This does not include + atomic requests. + Multi-context
gst_efficiencyRatio of requested global memory store throughput to required global memory + store throughput expressed as percentage. + Multi-context
gst_requested_throughputRequested global memory store throughputMulti-context
gst_throughputGlobal memory store throughputMulti-context
gst_transactionsNumber of global memory store transactionsMulti-context
gst_transactions_per_requestAverage number of global memory store transactions performed for each global + memory store + Multi-context
half_precision_fu_utilizationThe utilization level of the multiprocessor function units that execute 16 bit + floating-point instructions on a scale of 0 to 10 + Multi-context
inst_bit_convertNumber of bit-conversion instructions executed by non-predicated threadsMulti-context
inst_compute_ld_stNumber of compute load/store instructions executed by non-predicated + threads + Multi-context
inst_controlNumber of control-flow instructions executed by non-predicated threads (jump, + branch, etc.) + Multi-context
inst_executedThe number of instructions executedMulti-context
inst_executed_global_atomicsWarp level instructions for global atom and atom casMulti-context
inst_executed_global_loadsWarp level instructions for global loadsMulti-context
inst_executed_global_reductionsWarp level instructions for global reductionsMulti-context
inst_executed_global_storesWarp level instructions for global storesMulti-context
inst_executed_local_loadsWarp level instructions for local loadsMulti-context
inst_executed_local_storesWarp level instructions for local storesMulti-context
inst_executed_shared_atomicsWarp level shared instructions for atom and atom CASMulti-context
inst_executed_shared_loadsWarp level instructions for shared loadsMulti-context
inst_executed_shared_storesWarp level instructions for shared storesMulti-context
inst_executed_surface_atomicsWarp level instructions for surface atom and atom casMulti-context
inst_executed_surface_loadsWarp level instructions for surface loadsMulti-context
inst_executed_surface_reductionsWarp level instructions for surface reductionsMulti-context
inst_executed_surface_storesWarp level instructions for surface storesMulti-context
inst_executed_tex_opsWarp level instructions for textureMulti-context
inst_fp_16Number of half-precision floating-point instructions executed by non-predicated + threads (arithmetic, compare, etc.) + Multi-context
inst_fp_32Number of single-precision floating-point instructions executed by + non-predicated threads (arithmetic, compare, etc.) + Multi-context
inst_fp_64Number of double-precision floating-point instructions executed by + non-predicated threads (arithmetic, compare, etc.) + Multi-context
inst_integerNumber of integer instructions executed by non-predicated threadsMulti-context
inst_inter_thread_communicationNumber of inter-thread communication instructions executed by non-predicated + threads + Multi-context
inst_issuedThe number of instructions issuedMulti-context
inst_miscNumber of miscellaneous instructions executed by non-predicated threadsMulti-context
inst_per_warpAverage number of instructions executed by each warpMulti-context
inst_replay_overheadAverage number of replays for each instruction executedMulti-context
ipcInstructions executed per cycleMulti-context
issue_slot_utilizationPercentage of issue slots that issued at least one instruction, averaged across + all cycles + Multi-context
issue_slotsThe number of issue slots usedMulti-context
issued_ipcInstructions issued per cycleMulti-context
l2_atomic_throughputMemory read throughput seen at L2 cache for atomic and reduction + requests + Multi-context
l2_atomic_transactionsMemory read transactions seen at L2 cache for atomic and reduction + requests + Multi-context
l2_global_atomic_store_bytesBytes written to L2 from Unified cache for global atomics (ATOM and ATOM + CAS) + Multi-context
l2_global_load_bytesBytes read from L2 for misses in Unified Cache for global loadsMulti-context
l2_global_reduction_bytesBytes written to L2 from Unified cache for global reductionsMulti-context
l2_local_global_store_bytesBytes written to L2 from Unified Cache for local and global stores. This does + not include global atomics. + Multi-context
l2_local_load_bytesBytes read from L2 for misses in Unified Cache for local loadsMulti-context
l2_read_throughputMemory read throughput seen at L2 cache for all read requestsMulti-context
l2_read_transactionsMemory read transactions seen at L2 cache for all read requestsMulti-context
l2_surface_atomic_store_bytesBytes transferred between Unified Cache and L2 for surface atomics (ATOM and + ATOM CAS) + Multi-context
l2_surface_load_bytesBytes read from L2 for misses in Unified Cache for surface loadsMulti-context
l2_surface_reduction_bytesBytes written to L2 from Unified Cache for surface reductionsMulti-context
l2_surface_store_bytesBytes written to L2 from Unified Cache for surface stores. This does not include + surface atomics. + Multi-context
l2_tex_hit_rateHit rate at L2 cache for all requests from texture cacheMulti-context
l2_tex_read_hit_rateHit rate at L2 cache for all read requests from texture cache. + This is available for compute capability 6.0 and 6.1. + Multi-context
l2_tex_read_throughputMemory read throughput seen at L2 cache for read requests from the texture + cache + Multi-context
l2_tex_read_transactionsMemory read transactions seen at L2 cache for read requests from the texture + cache + Multi-context
l2_tex_write_hit_rateHit Rate at L2 cache for all write requests from texture cache. + This is available for compute capability 6.0 and 6.1. + Multi-context
l2_tex_write_throughputMemory write throughput seen at L2 cache for write requests from the texture + cache + Multi-context
l2_tex_write_transactionsMemory write transactions seen at L2 cache for write requests from the texture + cache + Multi-context
l2_utilizationThe utilization level of the L2 cache relative to the peak utilization on a + scale of 0 to 10 + Multi-context
l2_write_throughputMemory write throughput seen at L2 cache for all write requestsMulti-context
l2_write_transactionsMemory write transactions seen at L2 cache for all write requestsMulti-context
ldst_executedNumber of executed local, global, shared and texture memory load and store + instructions + Multi-context
ldst_fu_utilizationThe utilization level of the multiprocessor function units that execute shared + load, shared store and constant load instructions on a scale of 0 to 10 + Multi-context
ldst_issuedNumber of issued local, global, shared and texture memory load and store + instructions + Multi-context
local_hit_rateHit rate for local loads and storesMulti-context
local_load_requestsTotal number of local load requests from MultiprocessorMulti-context
local_load_throughputLocal memory load throughputMulti-context
local_load_transactionsNumber of local memory load transactionsMulti-context
local_load_transactions_per_requestAverage number of local memory load transactions performed for each local memory + load + Multi-context
local_memory_overheadRatio of local memory traffic to total memory traffic between the L1 and L2 + caches expressed as percentage + Multi-context
local_store_requestsTotal number of local store requests from MultiprocessorMulti-context
local_store_throughputLocal memory store throughputMulti-context
local_store_transactionsNumber of local memory store transactionsMulti-context
local_store_transactions_per_requestAverage number of local memory store transactions performed for each local + memory store + Multi-context
nvlink_overhead_data_receivedRatio of overhead data to the total data, received through NVLink. + This is available for compute capability 6.0. + Device
nvlink_overhead_data_transmittedRatio of overhead data to the total data, transmitted through NVLink. + This is available for compute capability 6.0. + Device
nvlink_receive_throughputNumber of bytes received per second through NVLinks. + This is available for compute capability 6.0. + Device
nvlink_total_data_receivedTotal data bytes received through NVLinks including headers. + This is available for compute capability 6.0. + Device
nvlink_total_data_transmittedTotal data bytes transmitted through NVLinks including headers. + This is available for compute capability 6.0. + Device
nvlink_total_nratom_data_transmittedTotal non-reduction atomic data bytes transmitted through NVLinks. + This is available for compute capability 6.0. + Device
nvlink_total_ratom_data_transmittedTotal reduction atomic data bytes transmitted through NVLinks + This is available for compute capability 6.0. + Device
nvlink_total_response_data_receivedTotal response data bytes received through NVLink, response data includes + data for read requests and result of non-reduction atomic requests. + This is available for compute capability 6.0. + Device
nvlink_total_write_data_transmittedTotal write data bytes transmitted through NVLinks. + This is available for compute capability 6.0. + Device
nvlink_transmit_throughputNumber of Bytes Transmitted per second through NVLinks. + This is available for compute capability 6.0. + Device
nvlink_user_data_receivedUser data bytes received through NVLinks, doesn't include headers. + This is available for compute capability 6.0. + Device
nvlink_user_data_transmittedUser data bytes transmitted through NVLinks, doesn't include headers. + This is available for compute capability 6.0. + Device
nvlink_user_nratom_data_transmittedTotal non-reduction atomic user data bytes transmitted through NVLinks. + This is available for compute capability 6.0. + Device
nvlink_user_ratom_data_transmittedTotal reduction atomic user data bytes transmitted through NVLinks. + This is available for compute capability 6.0. + Device
nvlink_user_response_data_received Total user response data bytes received through NVLink, response data includes + data for read requests and result of non-reduction atomic requests. + This is available for compute capability 6.0. + Device
nvlink_user_write_data_transmittedUser write data bytes transmitted through NVLinks. + This is available for compute capability 6.0. + Device
pcie_total_data_receivedTotal data bytes received through PCIeDevice
pcie_total_data_transmittedTotal data bytes transmitted through PCIeDevice
shared_efficiencyRatio of requested shared memory throughput to required shared memory throughput + expressed as percentage + Multi-context
shared_load_throughputShared memory load throughputMulti-context
shared_load_transactionsNumber of shared memory load transactionsMulti-context
shared_load_transactions_per_requestAverage number of shared memory load transactions performed for each shared + memory load + Multi-context
shared_store_throughputShared memory store throughputMulti-context
shared_store_transactionsNumber of shared memory store transactionsMulti-context
shared_store_transactions_per_requestAverage number of shared memory store transactions performed for each shared + memory store + Multi-context
shared_utilizationThe utilization level of the shared memory relative to peak utilization on a + scale of 0 to 10 + Multi-context
single_precision_fu_utilizationThe utilization level of the multiprocessor function units that execute + single-precision floating-point instructions and integer instructions on a scale of 0 + to 10 + Multi-context
sm_efficiencyThe percentage of time at least one warp is active on a specific + multiprocessor + Multi-context
special_fu_utilizationThe utilization level of the multiprocessor function units that execute sin, + cos, ex2, popc, flo, and similar instructions on a scale of 0 to 10 + Multi-context
stall_constant_memory_dependencyPercentage of stalls occurring because of immediate constant cache missMulti-context
stall_exec_dependencyPercentage of stalls occurring because an input required by the instruction is + not yet available + Multi-context
stall_inst_fetchPercentage of stalls occurring because the next assembly instruction has not yet + been fetched + Multi-context
stall_memory_dependencyPercentage of stalls occurring because a memory operation cannot be performed + due to the required resources not being available or fully utilized, or because too + many requests of a given type are outstanding + Multi-context
stall_memory_throttlePercentage of stalls occurring because of memory throttleMulti-context
stall_not_selectedPercentage of stalls occurring because warp was not selectedMulti-context
stall_otherPercentage of stalls occurring due to miscellaneous reasonsMulti-context
stall_pipe_busyPercentage of stalls occurring because a compute operation cannot be performed + because the compute pipeline is busy + Multi-context
stall_syncPercentage of stalls occurring because the warp is blocked at a __syncthreads() + call + Multi-context
stall_texturePercentage of stalls occurring because the texture sub-system is fully utilized + or has too many outstanding requests + Multi-context
surface_atomic_requestsTotal number of surface atomic(Atom and Atom CAS) requests from + Multiprocessor + Multi-context
surface_load_requestsTotal number of surface load requests from MultiprocessorMulti-context
surface_reduction_requestsTotal number of surface reduction requests from MultiprocessorMulti-context
surface_store_requestsTotal number of surface store requests from MultiprocessorMulti-context
sysmem_read_bytesNumber of bytes read from system memoryMulti-context
sysmem_read_throughputSystem memory read throughputMulti-context
sysmem_read_transactionsNumber of system memory read transactionsMulti-context
sysmem_read_utilizationThe read utilization level of the system memory relative to the peak utilization + on a scale of 0 to 10. This is available for compute capability 6.0 and 6.1. + Multi-context
sysmem_utilizationThe utilization level of the system memory relative to the peak utilization on a + scale of 0 to 10. This is available for compute capability 6.0 and 6.1. + Multi-context
sysmem_write_bytesNumber of bytes written to system memoryMulti-context
sysmem_write_throughputSystem memory write throughputMulti-context
sysmem_write_transactionsNumber of system memory write transactionsMulti-context
sysmem_write_utilizationThe write utilization level of the system memory relative to the peak + utilization on a scale of 0 to 10. This is available for compute capability 6.0 and 6.1. + + Multi-context
tex_cache_hit_rateUnified cache hit rateMulti-context
tex_cache_throughputUnified cache throughputMulti-context
tex_cache_transactionsUnified cache read transactionsMulti-context
tex_fu_utilizationThe utilization level of the multiprocessor function units that execute global, + local and texture memory instructions on a scale of 0 to 10 + Multi-context
tex_utilizationThe utilization level of the unified cache relative to the peak utilization on a + scale of 0 to 10 + Multi-context
texture_load_requestsTotal number of texture Load requests from MultiprocessorMulti-context
unique_warps_launchedNumber of warps launched. Value is unaffected by compute preemption.Multi-context
warp_execution_efficiencyRatio of the average active threads per warp to the maximum number of threads + per warp supported on a multiprocessor + Multi-context
warp_nonpred_execution_efficiencyRatio of the average active threads per warp executing non-predicated + instructions to the maximum number of threads per warp supported on a + multiprocessor + Multi-context
+
+
+
+
+
2.6.1.4. Metrics for Capability 7.0
+
+

+ Devices with compute capability 7.0 implement the metrics shown in the + following table. + +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 4. Capability 7.x (7.0 and 7.2) Metrics
Metric NameDescriptionScope
achieved_occupancyRatio of the average active warps per active cycle to the maximum number of + warps supported on a multiprocessor + Multi-context
atomic_transactionsGlobal memory atomic and reduction transactionsMulti-context
atomic_transactions_per_requestAverage number of global memory atomic and reduction transactions performed for + each atomic and reduction instruction + Multi-context
branch_efficiencyRatio of branch instruction to sum of branch and divergent branch + instruction + Multi-context
cf_executedNumber of executed control-flow instructionsMulti-context
cf_fu_utilizationThe utilization level of the multiprocessor function units that execute + control-flow instructions on a scale of 0 to 10 + Multi-context
cf_issuedNumber of issued control-flow instructionsMulti-context
double_precision_fu_utilizationThe utilization level of the multiprocessor function units that execute + double-precision floating-point instructions on a scale of 0 to 10 + Multi-context
dram_read_bytesTotal bytes read from DRAM to L2 cacheMulti-context
dram_read_throughputDevice memory read throughputMulti-context
dram_read_transactionsDevice memory read transactionsMulti-context
dram_utilizationThe utilization level of the device memory relative to the peak utilization on a + scale of 0 to 10 + Multi-context
dram_write_bytesTotal bytes written from L2 cache to DRAMMulti-context
dram_write_throughputDevice memory write throughputMulti-context
dram_write_transactionsDevice memory write transactionsMulti-context
eligible_warps_per_cycleAverage number of warps that are eligible to issue per active cycleMulti-context
flop_count_dpNumber of double-precision floating-point operations executed by non-predicated + threads (add, multiply, and multiply-accumulate). Each multiply-accumulate operation + contributes 2 to the count. + Multi-context
flop_count_dp_addNumber of double-precision floating-point add operations executed by + non-predicated threads. + Multi-context
flop_count_dp_fmaNumber of double-precision floating-point multiply-accumulate operations + executed by non-predicated threads. Each multiply-accumulate operation contributes 1 to + the count. + Multi-context
flop_count_dp_mulNumber of double-precision floating-point multiply operations executed by + non-predicated threads. + Multi-context
flop_count_hpNumber of half-precision floating-point operations executed by non-predicated + threads (add, multiply, and multiply-accumulate). Each multiply-accumulate contributes + 2 or 4 to the count based on the number of inputs. + Multi-context
flop_count_hp_addNumber of half-precision floating-point add operations executed by + non-predicated threads. + Multi-context
flop_count_hp_fmaNumber of half-precision floating-point multiply-accumulate operations executed + by non-predicated threads. Each multiply-accumulate contributes 2 or 4 to the count + based on the number of inputs. + Multi-context
flop_count_hp_mulNumber of half-precision floating-point multiply operations executed by + non-predicated threads. + Multi-context
flop_count_spNumber of single-precision floating-point operations executed by non-predicated + threads (add, multiply, and multiply-accumulate). Each multiply-accumulate operation + contributes 2 to the count. The count does not include special operations. + Multi-context
flop_count_sp_addNumber of single-precision floating-point add operations executed by + non-predicated threads. + Multi-context
flop_count_sp_fmaNumber of single-precision floating-point multiply-accumulate operations + executed by non-predicated threads. Each multiply-accumulate operation contributes 1 to + the count. + Multi-context
flop_count_sp_mulNumber of single-precision floating-point multiply operations executed by + non-predicated threads. + Multi-context
flop_count_sp_specialNumber of single-precision floating-point special operations executed by + non-predicated threads. + Multi-context
flop_dp_efficiencyRatio of achieved to peak double-precision floating-point operationsMulti-context
flop_hp_efficiencyRatio of achieved to peak half-precision floating-point operationsMulti-context
flop_sp_efficiencyRatio of achieved to peak single-precision floating-point operationsMulti-context
gld_efficiencyRatio of requested global memory load throughput to required global memory load + throughput expressed as percentage. + Multi-context
gld_requested_throughputRequested global memory load throughputMulti-context
gld_throughputGlobal memory load throughputMulti-context
gld_transactionsNumber of global memory load transactionsMulti-context
gld_transactions_per_requestAverage number of global memory load transactions performed for each global + memory load. + Multi-context
global_atomic_requestsTotal number of global atomic(Atom and Atom CAS) requests from + Multiprocessor + Multi-context
global_hit_rateHit rate for global load and store in unified l1/tex cacheMulti-context
global_load_requestsTotal number of global load requests from MultiprocessorMulti-context
global_reduction_requestsTotal number of global reduction requests from MultiprocessorMulti-context
global_store_requestsTotal number of global store requests from Multiprocessor. This does not include + atomic requests. + Multi-context
gst_efficiencyRatio of requested global memory store throughput to required global memory + store throughput expressed as percentage. + Multi-context
gst_requested_throughputRequested global memory store throughputMulti-context
gst_throughputGlobal memory store throughputMulti-context
gst_transactionsNumber of global memory store transactionsMulti-context
gst_transactions_per_requestAverage number of global memory store transactions performed for each global + memory store + Multi-context
half_precision_fu_utilizationThe utilization level of the multiprocessor function units that execute 16 bit + floating-point instructions on a scale of 0 to 10. Note that this doesn't specify the + utilization level of tensor core unit + Multi-context
inst_bit_convertNumber of bit-conversion instructions executed by non-predicated threadsMulti-context
inst_compute_ld_stNumber of compute load/store instructions executed by non-predicated + threads + Multi-context
inst_controlNumber of control-flow instructions executed by non-predicated threads (jump, + branch, etc.) + Multi-context
inst_executedThe number of instructions executedMulti-context
inst_executed_global_atomicsWarp level instructions for global atom and atom casMulti-context
inst_executed_global_loadsWarp level instructions for global loadsMulti-context
inst_executed_global_reductionsWarp level instructions for global reductionsMulti-context
inst_executed_global_storesWarp level instructions for global storesMulti-context
inst_executed_local_loadsWarp level instructions for local loadsMulti-context
inst_executed_local_storesWarp level instructions for local storesMulti-context
inst_executed_shared_atomicsWarp level shared instructions for atom and atom CASMulti-context
inst_executed_shared_loadsWarp level instructions for shared loadsMulti-context
inst_executed_shared_storesWarp level instructions for shared storesMulti-context
inst_executed_surface_atomicsWarp level instructions for surface atom and atom casMulti-context
inst_executed_surface_loadsWarp level instructions for surface loadsMulti-context
inst_executed_surface_reductionsWarp level instructions for surface reductionsMulti-context
inst_executed_surface_storesWarp level instructions for surface storesMulti-context
inst_executed_tex_opsWarp level instructions for textureMulti-context
inst_fp_16Number of half-precision floating-point instructions executed by non-predicated + threads (arithmetic, compare, etc.) + Multi-context
inst_fp_32Number of single-precision floating-point instructions executed by + non-predicated threads (arithmetic, compare, etc.) + Multi-context
inst_fp_64Number of double-precision floating-point instructions executed by + non-predicated threads (arithmetic, compare, etc.) + Multi-context
inst_integerNumber of integer instructions executed by non-predicated threadsMulti-context
inst_inter_thread_communicationNumber of inter-thread communication instructions executed by non-predicated + threads + Multi-context
inst_issuedThe number of instructions issuedMulti-context
inst_miscNumber of miscellaneous instructions executed by non-predicated threadsMulti-context
inst_per_warpAverage number of instructions executed by each warpMulti-context
inst_replay_overheadAverage number of replays for each instruction executedMulti-context
ipcInstructions executed per cycleMulti-context
issue_slot_utilizationPercentage of issue slots that issued at least one instruction, averaged across + all cycles + Multi-context
issue_slotsThe number of issue slots usedMulti-context
issued_ipcInstructions issued per cycleMulti-context
l2_atomic_throughputMemory read throughput seen at L2 cache for atomic and reduction + requests + Multi-context
l2_atomic_transactionsMemory read transactions seen at L2 cache for atomic and reduction + requests + Multi-context
l2_global_atomic_store_bytesBytes written to L2 from L1 for global atomics (ATOM and ATOM CAS)Multi-context
l2_global_load_bytesBytes read from L2 for misses in L1 for global loadsMulti-context
l2_local_global_store_bytesBytes written to L2 from L1 for local and global stores. This does not include + global atomics. + Multi-context
l2_local_load_bytesBytes read from L2 for misses in L1 for local loadsMulti-context
l2_read_throughputMemory read throughput seen at L2 cache for all read requestsMulti-context
l2_read_transactionsMemory read transactions seen at L2 cache for all read requestsMulti-context
l2_surface_load_bytesBytes read from L2 for misses in L1 for surface loadsMulti-context
l2_surface_store_bytesBytes read from L2 for misses in L1 for surface storesMulti-context
l2_tex_hit_rateHit rate at L2 cache for all requests from texture cacheMulti-context
l2_tex_read_hit_rateHit rate at L2 cache for all read requests from texture cacheMulti-context
l2_tex_read_throughputMemory read throughput seen at L2 cache for read requests from the texture + cache + Multi-context
l2_tex_read_transactionsMemory read transactions seen at L2 cache for read requests from the texture + cache + Multi-context
l2_tex_write_hit_rateHit Rate at L2 cache for all write requests from texture cacheMulti-context
l2_tex_write_throughputMemory write throughput seen at L2 cache for write requests from the texture + cache + Multi-context
l2_tex_write_transactionsMemory write transactions seen at L2 cache for write requests from the texture + cache + Multi-context
l2_utilizationThe utilization level of the L2 cache relative to the peak utilization on a + scale of 0 to 10 + Multi-context
l2_write_throughputMemory write throughput seen at L2 cache for all write requestsMulti-context
l2_write_transactionsMemory write transactions seen at L2 cache for all write requestsMulti-context
ldst_executedNumber of executed local, global, shared and texture memory load and store + instructions + Multi-context
ldst_fu_utilizationThe utilization level of the multiprocessor function units that execute shared + load, shared store and constant load instructions on a scale of 0 to 10 + Multi-context
ldst_issuedNumber of issued local, global, shared and texture memory load and store + instructions + Multi-context
local_hit_rateHit rate for local loads and storesMulti-context
local_load_requestsTotal number of local load requests from MultiprocessorMulti-context
local_load_throughputLocal memory load throughputMulti-context
local_load_transactionsNumber of local memory load transactionsMulti-context
local_load_transactions_per_requestAverage number of local memory load transactions performed for each local memory + load + Multi-context
local_memory_overheadRatio of local memory traffic to total memory traffic between the L1 and L2 + caches expressed as percentage + Multi-context
local_store_requestsTotal number of local store requests from MultiprocessorMulti-context
local_store_throughputLocal memory store throughputMulti-context
local_store_transactionsNumber of local memory store transactionsMulti-context
local_store_transactions_per_requestAverage number of local memory store transactions performed for each local + memory store + Multi-context
nvlink_overhead_data_receivedRatio of overhead data to the total data, received through NVLink.Device
nvlink_overhead_data_transmittedRatio of overhead data to the total data, transmitted through NVLink.Device
nvlink_receive_throughputNumber of bytes received per second through NVLinks.Device
nvlink_total_data_receivedTotal data bytes received through NVLinks including headers.Device
nvlink_total_data_transmittedTotal data bytes transmitted through NVLinks including headers.Device
nvlink_total_nratom_data_transmittedTotal non-reduction atomic data bytes transmitted through NVLinks.Device
nvlink_total_ratom_data_transmittedTotal reduction atomic data bytes transmitted through NVLinks.Device
nvlink_total_response_data_receivedTotal response data bytes received through NVLink, response data includes + data for read requests and result of non-reduction atomic requests. + Device
nvlink_total_write_data_transmittedTotal write data bytes transmitted through NVLinks.Device
nvlink_transmit_throughputNumber of Bytes Transmitted per second through NVLinks.Device
nvlink_user_data_receivedUser data bytes received through NVLinks, doesn't include headers.Device
nvlink_user_data_transmittedUser data bytes transmitted through NVLinks, doesn't include headers.Device
nvlink_user_nratom_data_transmittedTotal non-reduction atomic user data bytes transmitted through NVLinks.Device
nvlink_user_ratom_data_transmittedTotal reduction atomic user data bytes transmitted through NVLinks.Device
nvlink_user_response_data_received Total user response data bytes received through NVLink, response data includes + data for read requests and result of non-reduction atomic requests. + Device
nvlink_user_write_data_transmittedUser write data bytes transmitted through NVLinks.Device
pcie_total_data_receivedTotal data bytes received through PCIeDevice
pcie_total_data_transmittedTotal data bytes transmitted through PCIeDevice
shared_efficiencyRatio of requested shared memory throughput to required shared memory throughput + expressed as percentage + Multi-context
shared_load_throughputShared memory load throughputMulti-context
shared_load_transactionsNumber of shared memory load transactionsMulti-context
shared_load_transactions_per_requestAverage number of shared memory load transactions performed for each shared + memory load + Multi-context
shared_store_throughputShared memory store throughputMulti-context
shared_store_transactionsNumber of shared memory store transactionsMulti-context
shared_store_transactions_per_requestAverage number of shared memory store transactions performed for each shared + memory store + Multi-context
shared_utilizationThe utilization level of the shared memory relative to peak utilization on a + scale of 0 to 10 + Multi-context
single_precision_fu_utilizationThe utilization level of the multiprocessor function units that execute + single-precision floating-point instructions on a scale of 0 to 10 + Multi-context
sm_efficiencyThe percentage of time at least one warp is active on a specific + multiprocessor + Multi-context
special_fu_utilizationThe utilization level of the multiprocessor function units that execute sin, + cos, ex2, popc, flo, and similar instructions on a scale of 0 to 10 + Multi-context
stall_constant_memory_dependencyPercentage of stalls occurring because of immediate constant cache missMulti-context
stall_exec_dependencyPercentage of stalls occurring because an input required by the instruction is + not yet available + Multi-context
stall_inst_fetchPercentage of stalls occurring because the next assembly instruction has not yet + been fetched + Multi-context
stall_memory_dependencyPercentage of stalls occurring because a memory operation cannot be performed + due to the required resources not being available or fully utilized, or because too + many requests of a given type are outstanding + Multi-context
stall_memory_throttlePercentage of stalls occurring because of memory throttleMulti-context
stall_not_selectedPercentage of stalls occurring because warp was not selectedMulti-context
stall_otherPercentage of stalls occurring due to miscellaneous reasonsMulti-context
stall_pipe_busyPercentage of stalls occurring because a compute operation cannot be performed + because the compute pipeline is busy + Multi-context
stall_sleepingPercentage of stalls occurring because warp was sleepingMulti-context
stall_syncPercentage of stalls occurring because the warp is blocked at a __syncthreads() + call + Multi-context
stall_texturePercentage of stalls occurring because the texture sub-system is fully utilized + or has too many outstanding requests + Multi-context
surface_atomic_requestsTotal number of surface atomic(Atom and Atom CAS) requests from + Multiprocessor + Multi-context
surface_load_requestsTotal number of surface load requests from MultiprocessorMulti-context
surface_reduction_requestsTotal number of surface reduction requests from MultiprocessorMulti-context
surface_store_requestsTotal number of surface store requests from MultiprocessorMulti-context
sysmem_read_bytesNumber of bytes read from system memoryMulti-context
sysmem_read_throughputSystem memory read throughputMulti-context
sysmem_read_transactionsNumber of system memory read transactionsMulti-context
sysmem_read_utilizationThe read utilization level of the system memory relative to the peak utilization + on a scale of 0 to 10 + Multi-context
sysmem_utilizationThe utilization level of the system memory relative to the peak utilization on a + scale of 0 to 10 + Multi-context
sysmem_write_bytesNumber of bytes written to system memoryMulti-context
sysmem_write_throughputSystem memory write throughputMulti-context
sysmem_write_transactionsNumber of system memory write transactionsMulti-context
sysmem_write_utilizationThe write utilization level of the system memory relative to the peak + utilization on a scale of 0 to 10 + Multi-context
tensor_precision_fu_utilizationThe utilization level of the multiprocessor function units that execute tensor + core instructions on a scale of 0 to 10 + Multi-context
tensor_int_fu_utilizationThe utilization level of the multiprocessor function units that execute tensor + core int8 instructions on a scale of 0 to 10. + This metric is only available for device with compute capability 7.2. + Multi-context
tex_cache_hit_rateUnified cache hit rateMulti-context
tex_cache_throughputUnified cache to Multiprocessor read throughputMulti-context
tex_cache_transactionsUnified cache to Multiprocessor read transactionsMulti-context
tex_fu_utilizationThe utilization level of the multiprocessor function units that execute global, + local and texture memory instructions on a scale of 0 to 10 + Multi-context
tex_utilizationThe utilization level of the unified cache relative to the peak utilization on a + scale of 0 to 10 + Multi-context
texture_load_requestsTotal number of texture Load requests from MultiprocessorMulti-context
warp_execution_efficiencyRatio of the average active threads per warp to the maximum number of threads + per warp supported on a multiprocessor + Multi-context
warp_nonpred_execution_efficiencyRatio of the average active threads per warp executing non-predicated + instructions to the maximum number of threads per warp supported on a + multiprocessor + Multi-context
+
+
+
+
+
+
+

2.7. CUPTI Profiling API

+
+
+

Starting with CUDA 10.0, a new set of metric APIs are added for devices with compute + capability 7.0 and higher. These APIs provide low and deterministic profiling overhead on the + target system. These are supported on all CUDA supported platforms except Android, and + are not supported under MPS (Multi-Process Service), Confidential Compute, + or SLI configured systems. In order to determine whether a device is compatible with this + API, a new function cuptiProfilerDeviceSupported is introduced in CUDA 11.5 + which exposes overall Profiling API support and specific requirements for a given device. + Profiling API must be initialized by calling cuptiProfilerInitialize before + testing device support. + +

+
This section covers performance profiling Host and Target APIs for CUDA. Broadly + profiling APIs are divided into following four sections: +
    +
  • Enumeration + (Host) +
  • +
  • Configuration (Host)
  • +
  • Collection (Target)
  • +
  • Evaluation (Host)
  • +
+ Host APIs provide a metric + interface for enumeration, configuration and evaluation that doesn't require a + compute(GPU) device, and can also run in an offline mode. In the samples section under + extensions, profiler host + utility covers the usage of host APIs. Target APIs are used for data collection of the + metrics and requires a compute (GPU) device. Refer to samples auto_rangeProfiling and + userrange_profiling for usage of profiling APIs. +
+

The list of metrics has been overhauled from earlier generation metrics and event APIs, to + support a standard naming convention based upon + unit__(subunit?)_(pipestage?)_quantity_qualifiers

+
+
+
+

2.7.1. Multi Pass Collection

+
+
+

NVIDIA GPU hardware has a limited number of counter registers and cannot collect all + possible counters concurrently. There are also limitations on which counters can be + collected together in a single pass. This + is resolved by replaying the exact same set of GPU workloads multiple times, where + each replay is termed a pass. On + each pass, a different subset of requested counters are collected. Once all passes + are collected, the data is available for evaluation. Certain metrics have many counters as inputs; adding a single metric + may require many + passes to collect. CUPTI APIs support multi pass collection through different collection attributes. +

+

Sample cupti_metric_properties + shows how to query number of passes required to collect a set of counters. +

+
+
+
+
+

2.7.2. Range Profiling

+
+
+

+ Each profiling session runs a series of replay passes, where each pass contains a sequence of ranges. + Every metric enabled in the session's configuration is collected separately per unique range-stack in the pass. + CUPTI supports auto and user defined ranges. + +

+
+
+
+
2.7.2.1. Auto Range
+
+
+

In a session with auto range mode, ranges are defined around each kernel + automatically with a unique name assigned to each range, while profiling is + enabled. This mode is useful for tight metric collection around each kernel. A user + can choose one of the supported replay modes, pseudo code for each is described + below: +

+
+
+
Kernel Replay
+
The replay logic (multiple pass, if needed) is done by CUPTI implicitly (opaque to the + user), and usage of CUPTI replay API's cuptiProfilerBeginPass and + cuptiProfilerEndPass will be a no-op in this mode. This mode is + useful for collecting metrics around a kernel in tight control. Each kernel launch + is synchronized to segregate its metrics into a separate range, and a CPU-GPU sync + is made to ensure the profiled data is collected from GPU. Counter Collection can be + enabled and disabled with cuptiProfilerEnableProfiling and + cuptiProfilerDisableProfiling. Refer to the sample autorange_profiling
+/* Assume Inputs(counterDataImagePrefix and configImage) from configuration phase at host */
+void Collection(std::vector<uint8_t>& counterDataImagePrefix, std::vector<uint8_t>& configImage)
+{
+	CUpti_Profiler_Initialize_Params profilerInitializeParams = { CUpti_Profiler_Initialize_Params_STRUCT_SIZE };
+	cuptiProfilerInitialize(&profilerInitializeParams);
+
+	std::vector<uint8_t> counterDataImages;
+	std::vector<uint8_t> counterDataScratchBuffer;
+	CreateCounterDataImage(counterDataImages, counterDataScratchBuffer, counterDataImagePrefix);
+
+	CUpti_Profiler_BeginSession_Params beginSessionParams = { CUpti_Profiler_BeginSession_Params_STRUCT_SIZE };
+	CUpti_ProfilerRange profilerRange = CUPTI_AutoRange;
+	CUpti_ProfilerReplayMode profilerReplayMode = CUPTI_KernelReplay;
+
+	beginSessionParams.ctx = NULL;
+	beginSessionParams.counterDataImageSize = counterDataImage.size();
+	beginSessionParams.pCounterDataImage = &counterDataImage[0];
+	beginSessionParams.counterDataScratchBufferSize = counterDataScratchBuffer.size();
+	beginSessionParams.pCounterDataScratchBuffer = &counterDataScratchBuffer[0];
+	beginSessionParams.range = profilerRange;
+	beginSessionParams.replayMode = profilerReplayMode;
+	beginSessionParams.maxRangesPerPass = num_ranges;
+	beginSessionParams.maxLaunchesPerPass = num_ranges;
+
+	cuptiProfilerBeginSession(&beginSessionParams));
+
+	CUpti_Profiler_SetConfig_Params setConfigParams = { CUpti_Profiler_SetConfig_Params_STRUCT_SIZE };
+	setConfigParams.pConfig = &configImage[0];
+	setConfigParams.configSize = configImage.size();
+
+	cuptiProfilerSetConfig(&setConfigParams));
+
+	kernelA <<<grid, tids >>>(...);                  // KernelA not profiled
+
+	CUpti_Profiler_EnableProfiling_Params enableProfilingParams = { CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE };
+	cuptiProfilerEnableProfiling(&enableProfilingParams);
+	{
+
+		kernelB <<<grid, tids >>>(...);              // KernelB profiled and captured in an unique range.
+		kernelC <<<grid, tids >>>(...);              // KernelC profiled and captured in an unique range.
+		kernelD <<<grid, tids >>>(...);              // KernelD profiled and captured in an unique range.
+	}
+
+	CUpti_Profiler_DisableProfiling_Params disableProfilingParams = { CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE };
+	cuptiProfilerDisableProfiling(&disableProfilingParams);
+
+	kernelE <<<grid, tids >>>(...);                  // KernelE not profiled
+
+	CUpti_Profiler_UnsetConfig_Params unsetConfigParams = { CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE };
+	cuptiProfilerUnsetConfig(&unsetConfigParams);
+
+	CUpti_Profiler_EndSession_Params endSessionParams = { CUpti_Profiler_EndSession_Params_STRUCT_SIZE };
+	cuptiProfilerEndSession(&endSessionParams);
+}
+                
+
+
+
User Replay
+
The replay (multiple passes, if needed) is done by the user using the replay API's + cuptiProfilerBeginPass and + cuptiProfilerEndPass. It is user responsibility to flush the + counter data cuptiProfilerFlushCounterData before ending the + session to ensure collection of metric data in CPU. Counter collection can be + enabled and disabled with cuptiProfilerEnableProfiling/ + cuptiProfilerDisableProfiling. Refer to the sample autorange_profiling
+    /* Assume Inputs(counterDataImagePrefix and configImage) from configuration phase at host */
+
+    void Collection(std::vector<uint8_t>& counterDataImagePrefix, std::vector<uint8_t>& configImage)
+    {
+        CUpti_Profiler_Initialize_Params profilerInitializeParams = {CUpti_Profiler_Initialize_Params_STRUCT_SIZE};
+        cuptiProfilerInitialize(&profilerInitializeParams);
+
+        std::vector<uint8_t> counterDataImages;
+        std::vector<uint8_t> counterDataScratchBuffer;
+        CreateCounterDataImage(counterDataImages, counterDataScratchBuffer, counterDataImagePrefix);
+
+        CUpti_Profiler_BeginSession_Params beginSessionParams = {CUpti_Profiler_BeginSession_Params_STRUCT_SIZE};
+        CUpti_ProfilerRange profilerRange = CUPTI_AutoRange;
+        CUpti_ProfilerReplayMode profilerReplayMode = CUPTI_UserReplay;
+
+        beginSessionParams.ctx = NULL;
+        beginSessionParams.counterDataImageSize = counterDataImage.size();
+        beginSessionParams.pCounterDataImage = &counterDataImage[0];
+        beginSessionParams.counterDataScratchBufferSize = counterDataScratchBuffer.size();
+        beginSessionParams.pCounterDataScratchBuffer = &counterDataScratchBuffer[0];
+        beginSessionParams.range = profilerRange;
+        beginSessionParams.replayMode = profilerReplayMode;
+        beginSessionParams.maxRangesPerPass = num_ranges;
+        beginSessionParams.maxLaunchesPerPass = num_ranges;
+
+        cuptiProfilerBeginSession(&beginSessionParams));
+
+        CUpti_Profiler_SetConfig_Params setConfigParams = {CUpti_Profiler_SetConfig_Params_STRUCT_SIZE};
+        setConfigParams.pConfig = &configImage[0];
+        setConfigParams.configSize = configImage.size();
+
+        cuptiProfilerSetConfig(&setConfigParams));
+
+        CUpti_Profiler_FlushCounterData_Params cuptiFlushCounterDataParams =        {CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE};
+
+        CUpti_Profiler_EnableProfiling_Params enableProfilingParams =       {CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE};
+
+        CUpti_Profiler_DisableProfiling_Params disableProfilingParams =         {CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE};
+
+
+        kernelA<<<grid, tids>>>(...);                  // KernelA neither profiled, nor replayed
+
+        CUpti_Profiler_BeginPass_Params beginPassParams = {CUpti_Profiler_BeginPass_Params_STRUCT_SIZE};
+        CUpti_Profiler_EndPass_Params endPassParams = {CUpti_Profiler_EndPass_Params_STRUCT_SIZE};
+
+        cuptiProfilerBeginPass(&beginPassParams);
+        {
+            kernelB<<<grid, tids>>>(...);              // KernelB replayed but not profiled
+    
+            cuptiProfilerEnableProfiling(&enableProfilingParams);
+    
+            kernelC<<<grid, tids>>>(...);              // KernelC profiled and captured in an unique range.
+            kernelD<<<grid, tids>>>(...);              // KernelD profiled and captured in an unique range.
+    
+            cuptiProfilerDisableProfiling(&disableProfilingParams);
+        }
+        cuptiProfilerEndPass(&endPassParams);
+        
+        cuptiProfilerFlushCounterData(&cuptiFlushCounterDataParams);
+
+        kernelE<<<grid, tids>>>(...);                  // KernelE not profiled
+
+        CUpti_Profiler_UnsetConfig_Params unsetConfigParams = {CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE};
+        cuptiProfilerUnsetConfig(&unsetConfigParams);
+
+        CUpti_Profiler_EndSession_Params endSessionParams = {CUpti_Profiler_EndSession_Params_STRUCT_SIZE};
+        cuptiProfilerEndSession(&endSessionParams);
+    }                
+
+
+
Application Replay
+

This replay mode is same as user replay, instead of in process replay, you can + replay the whole process again. You will need to update the pass index while setting the + config cuptiProfilerSetConfig and reload the intermediate + counterDataImage on each pass. +

+
+
+
+
+
2.7.2.2. User Range
+
+
+

In a session with user range mode, ranges are defined by you, + cuptiProfilerPushRange and + cuptiProfilerPopRange. Kernel launches are concurrent + within a range. This mode is useful for metric data collection around a specific + section of code, instead of per-kernel metric collection. Kernel replay is not + supported in user range mode. You own the responsibility of replay using + cuptiProfilerBeginPass and + cuptiProfilerEndPass. +

+
+
+
User Replay
+
The replay (multiple passes, if needed) is done by the user using the replay API's + cuptiProfilerBeginPass and + cuptiProfilerEndPass. It is your responsibility to flush the counter + data using cuptiProfilerFlushCounterData before ending the session. + Counter collection can be enabled/disabled with + cuptiProfilerEnableProfiling and + cuptiProfilerDisableProfiling. Refer to the sample userrange_profiling
>
+    /* Assume Inputs(counterDataImagePrefix and configImage) from configuration phase at host */
+
+    void Collection(std::vector<uint8_t>& counterDataImagePrefix, std::vector<uint8_t>& configImage)
+    {
+        CUpti_Profiler_Initialize_Params profilerInitializeParams = {CUpti_Profiler_Initialize_Params_STRUCT_SIZE};
+        cuptiProfilerInitialize(&profilerInitializeParams);
+
+        std::vector<uint8_t> counterDataImages;
+        std::vector<uint8_t> counterDataScratchBuffer;
+        CreateCounterDataImage(counterDataImages, counterDataScratchBuffer, counterDataImagePrefix);
+
+        CUpti_Profiler_BeginSession_Params beginSessionParams = {CUpti_Profiler_BeginSession_Params_STRUCT_SIZE};
+        CUpti_ProfilerRange profilerRange = CUPTI_UserRange;
+        CUpti_ProfilerReplayMode profilerReplayMode = CUPTI_UserReplay;
+
+        beginSessionParams.ctx = NULL;
+        beginSessionParams.counterDataImageSize = counterDataImage.size();
+        beginSessionParams.pCounterDataImage = &counterDataImage[0];
+        beginSessionParams.counterDataScratchBufferSize = counterDataScratchBuffer.size();
+        beginSessionParams.pCounterDataScratchBuffer = &counterDataScratchBuffer[0];
+        beginSessionParams.range = profilerRange;
+        beginSessionParams.replayMode = profilerReplayMode;
+        beginSessionParams.maxRangesPerPass = num_ranges;
+        beginSessionParams.maxLaunchesPerPass = num_ranges;
+
+        cuptiProfilerBeginSession(&beginSessionParams));
+
+        CUpti_Profiler_SetConfig_Params setConfigParams = {CUpti_Profiler_SetConfig_Params_STRUCT_SIZE};
+        setConfigParams.pConfig = &configImage[0];
+        setConfigParams.configSize = configImage.size();
+
+        cuptiProfilerSetConfig(&setConfigParams));
+
+        CUpti_Profiler_FlushCounterData_Params cuptiFlushCounterDataParams = {CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE};
+
+        kernelA<<<grid, tids>>>(...);                  // KernelA neither profiled, nor replayed
+
+        CUpti_Profiler_BeginPass_Params beginPassParams = {CUpti_Profiler_BeginPass_Params_STRUCT_SIZE};
+        CUpti_Profiler_EndPass_Params endPassParams = {CUpti_Profiler_EndPass_Params_STRUCT_SIZE};
+        
+        cuptiProfilerBeginPass(&beginPassParams);
+        {
+            kernelB<<<grid, tids>>>(...);              // KernelB replayed but not profiled
+    
+            CUpti_Profiler_PushRange_Params enableProfilingParams = {CUpti_Profiler_PushRange_Params_STRUCT_SIZE};
+            pushRangeParams.pRangeName = "RangeA";
+            cuptiProfilerPushRange(&pushRangeParams);
+    
+            kernelC<<<grid, tids>>>(...);
+            kernelD<<<grid, tids>>>(...);
+    
+            cuptiProfilerPopRange(&popRangeParams);     // Kernel C and Kernel D are captured in rangeA without any serialization introduced by profiler
+        }
+        cuptiProfilerEndPass(&endPassParams);
+        cuptiProfilerFlushCounterData(&cuptiFlushCounterDataParams);
+
+        kernelE<<<grid, tids>>>(...);                  // KernelE not Profiled
+
+        CUpti_Profiler_UnsetConfig_Params unsetConfigParams = {CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE};
+        cuptiProfilerUnsetConfig(&unsetConfigParams);
+
+        CUpti_Profiler_EndSession_Params endSessionParams = {CUpti_Profiler_EndSession_Params_STRUCT_SIZE};
+        cuptiProfilerEndSession(&endSessionParams);
+    }                
+
+
+
Application Replay
+

This replay mode is same as user replay, instead of in process replay, you can + replay the whole process again. You will need to update the pass index while setting the + config using the cuptiProfilerSetConfig API, and reload the intermediate + counterDataImage on each pass. +

+
+
+
+
+
+

2.7.3. CUPTI Profiler Definitions

+
+
+

Definitions of glossary used in this section.

+
+
+ Counter: +
+
The number of occurrences of a specific event on the device. +
+
+ Configuration Image: +
+
A Blob to configure the session for counters to be collected. +
+
+ CounterData Image: +
+
A Blob which contains the values of collected counters
+
+ CounterData Prefix: +
+
A metadata header for CounterData Image
+
+ Device: +
+
A physical NVIDIA GPU.
+
+ Event: +
+
An event is a countable activity, action, or occurrence on device. +
+
+ Metric: +
+
A high-level value derived from counter values. +
+
+ Pass: +
+
A repeatable set of operations, with consistently labeled ranges. +
+
+ Range: +
+
A labeled region of execution
+
+ Replay: +
+
Performing the repeatable set of operation.
+
+ Session: +
+
A profiling session where GPU resources needed for profiling are allocated. + The profiler is in armed state at session boundaries, and power management may be + disabled at session boundaries. Outside of a session, the GPU will return to its + normal operating state. +
+
+
+
+
+
+

2.7.4. Differences from event and metric APIs

+
+
+
Here is the list of features which are supported by the event and metric APIs but these + are not available with the Profiling API: + +
    +
  • Continuous mode or sampling of the metrics.
  • +
  • Profiling API provides closest equivalent metrics for most of the events and metrics supported + by the event and metric APIs. However, there are some events and metrics, for example NVLink + performance metrics, for which there is no equivalent metrics in the Profiling API. + Tables Metrics Mapping Table + and Events Mapping Table can + be referred to find the equivalent Perfworks metrics for compute capability 7.0. +
  • +
  • Per-instance metrics i.e. users can't collect metrics for each instance of the + hardware units like SM, FB etc separately. However Profiling API provides sub-metrics + which can be used to get the avg/sum/min/max across all instances of a hardware unit. +
  • +
+
+
+
+
+
+
+

2.8. Perfworks Metric API

+
+
+

Introduction:

+

+ The Perfworks Metric API supports the enumeration, configuration and evaluation of metrics. The binary outputs of the configuration + phase are inputs to the CUPTI Range Profiling API. The output of + Range Profiling is the CounterData, which is passed to the Derived Metrics Evaluation APIs. + +

+

+ GPU Metrics are generally presented as counts, ratios and percentages. The underlying values collected from hardware are + raw counters (analogous to CUPTI events), but those details are hidden behind derived metric formulas. + +

+

+ The Metric APIs are split into two layers: Derived Metrics and Raw Metrics. + Derived Metrics contains the list of named metrics and performs evaluation to numeric results, serving a similar purpose as + the previous CUPTI Metric API. + Most user interaction will be with derived metrics. + Raw Metrics contains the list of raw counters and generates configuration file images analogous to + the previous CUPTI Event API. + +

+
+
+

Metric Enumeration

+

+ Metric Enumeration is the process of listing available counters and metrics. + +

+

+ Refer to file List.cpp used by the + cupti_metric_properties sample. + +

+

+ Metrics are grouped into three types i.e. counters, ratios and throughput. Except ratios metric type each metrics have four + type of sub-metrics also known as rollup metrics i.e. sum, avg, min, max. + +

+
+ For enumerating supported metrics for a chip, we need to calculate the scratch buffer needed for host operation and to initialize + the Metric Evaluator. + +
    +
  • Call NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize for calculating scratch buffer size required for allocating memory for host operations. +
  • +
  • Call NVPW_CUDA_MetricsEvaluator_Initialize for initializing the Metrics Evaluator which creates a NVPW_MetricsEvaluator object. +
  • +
+ + The outline for enumerating supported counter metrics for a chip: + +
    +
  • Call NVPW_MetricsEvaluator_GetMetricNames for NVPW_METRIC_TYPE_COUNTER metric type for listing all the counter metrics supported. +
  • +
  • Call NVPW_MetricsEvaluator_GetSupportedSubmetrics to list all the sub-metric supported for NVPW_METRIC_TYPE_COUNTER metric type. +
  • +
  • Call NVPW_MetricsEvaluator_GetCounterProperties to give description of the counter and the collection hardware unit. +
  • +
+ + Similarly, for enumerating ratio and throughput metrics we need to pass NVPW_METRIC_TYPE_RATIO and NVPW_METRIC_TYPE_THROUGHPUT as metric types to NVPW_MetricsEvaluator_GetMetricNames and NVPW_MetricsEvaluator_GetSupportedSubmetrics. + +
+

+ For more details about the metric properties call NVPW_MetricsEvaluator_GetRatioMetricProperties and NVPW_MetricsEvaluator_GetThroughputMetricProperties respectively. + +

+
+
+

Configuration Workflow

+

+ Configuration is the process of specifying the metrics that will be collected and how those metrics should be collected. + The inputs for this phase are the metric names and metric collection properties. The output for this phase is a ConfigImage + and a CounterDataPrefix Image. + +

+

+ Refer to file Metric.cpp used by the + userrange_profiling sample. + +

+
+ The outline for configuring metrics: + +
    +
  • As input, take a list of metric names.
  • +
  • Before creating ConfigImage or CounterDataPrefixImage, we need a list of NVPA_RawMetricRequest for the metrics listed for + collection. + +
      +
    • We need to calculate the scratch buffer size required for the host operation and to initialize the Metric Evaluator like in + the Enumeration phase. +
    • +
    • For each metric, Call NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest for creating a NVPW_MetricEvalRequest. +
    • +
    • Call NVPW_MetricsEvaluator_GetMetricRawDependencies which takes the NVPW_MetricsEvaluator and NVPW_MetricEvalRequest as input, for getting raw dependencies for given metrics. +
    • +
    +
  • +
  • Create an NVPA_RawMetricRequest with keepInstances=true and isolated=true
  • +
  • Pass the NVPA_RawMetricRequest to NVPW_RawMetricsConfig_AddMetrics for the ConfigImage. +
  • +
  • Pass the NVPA_RawMetricRequest to NVPW_CounterDataBuilder_AddMetrics for the CounterDataPrefix. +
  • +
  • Generate binary configuration "images" (file format in memory): + +
      +
    • ConfigImage from NVPW_RawMetricsConfig_GetConfigImage
    • +
    • CounterDataPrefix from NVPW_CounterDataBuilder_GetCounterDataPrefix
    • +
    +
  • +
+
+
+
+

Metric Evaluation

+

+ Metric Evaluation is the process of forming metrics from the counters stored in the CounterData image. + +

+

+ Refer to file Eval.cpp used by the + userrange_profiling sample. + +

+
+ The outline for configuring metrics: + +
    +
  • As input, take the same list of metric names as used during configuration.
  • +
  • As input, take a CounterDataImage collected on a target device. +
  • +
  • We need to calculate the scratch buffer size required for the host operation and to initialize the Metric Evaluator like in + the Enumeration phase. +
  • +
  • Query the number of ranges collected via NVPW_CounterData_GetNumRanges. +
  • +
  • For each metric: + +
      +
    • Call NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest for creating NVPW_MetricEvalRequest
    • +
    • For each range: + +
        +
      • Call NVPW_Profiler_CounterData_GetRangeDescriptions to retrieve the range's description, originally set by cuptiProfilerPushRange. +
      • +
      • Call NVPW_MetricsEvaluator_SetDeviceAttributes to set the current range for evaluation on the NVPW_MetricEvalRequest. +
      • +
      • Call NVPW_MetricsEvaluator_EvaluateToGpuValues to query an array of numeric values corresponding to each input metric. +
      • +
      +
    • +
    +
  • +
+
+
+
+
+

2.8.1. Derived metrics

+
+
+

Metrics Overview

+

+ The PerfWorks API comes with an advanced metrics calculation system, designed to help you determine what happened (counters + and metrics), + and how close the program reached to peak GPU performance (throughputs as a percentage). Every counter has associated peak + rates in the database, + to allow computing its throughput as a percentage. + +

+

+ Throughput metrics return the maximum percentage value of their constituent counters. Constituents can be programmatically + queried via + NVPW_MetricsEvaluator_GetMetricNames with NVPW_METRIC_TYPE_THROUGHPUT as metric types. These constituents have been carefully selected to represent the sections + of the GPU pipeline that govern peak performance. While all counters can be converted to a %-of-peak, not all counters are + suitable for + peak-performance analysis; examples of unsuitable counters include qualified subsets of activity, and workload residency counters. + Using throughput metrics ensures meaningful and actionable analysis. + +

+

+ Two types of peak rates are available for every counter: burst and sustained. + Burst rate is the maximum rate reportable in a single clock cycle. + Sustained rate is the maximum rate achievable over an infinitely long measurement period, for "typical" operations. + For many counters, burst == sustained. Since the burst rate cannot be exceeded, percentages of burst rate will always be + less than 100%. + Percentages of sustained rate can occasionally exceed 100% in edge cases. Burst metrics are only supported with MetricsContext + APIs and these will be deprecated in a future CUDA release. These metrics are not supported with NVPW_MetricsEvaluator APIs. + +

+
+
+

Metrics Entities

+
+ The Metrics layer has 3 major types of entities: + +
    +
  • Metrics : these are calculated quantities, with the following static properties: + +
      +
    • Description string.
    • +
    • Dimensional Units : a list of ('name', exponent) in the style of dimensional analysis. Example string representation: pixels / gpc_clk. +
    • +
    • Raw Metric dependencies : the list of raw metrics that must be collected, in order to evaluate the metric.
    • +
    • Every metric has the following sub-metrics built in. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      .peak_sustained the peak sustained rate
      .peak_sustained_active the peak sustained rate during unit active cycles
      .peak_sustained_active.per_second the peak sustained rate during unit active cycles, per second *
      .peak_sustained_elapsed the peak sustained rate during unit elapsed cycles
      .peak_sustained_elapsed.per_second the peak sustained rate during unit elapsed cycles, per second *
      .peak_sustained_region the peak sustained rate over a user-specified "range"
      .peak_sustained_region.per_second the peak sustained rate over a user-specified "range", per second *
      .peak_sustained_frame the peak sustained rate over a user-specified "frame"
      .peak_sustained_frame.per_second the peak sustained rate over a user-specified "frame", per second *
      .per_cycle_active the number of operations per unit active cycle
      .per_cycle_elapsed the number of operations per unit elapsed cycle
      .per_cycle_in_region the number of operations per user-specified "range" cycle
      .per_cycle_in_frame the number of operations per user-specified "frame" cycle
      .per_second the number of operations per second
      .pct_of_peak_sustained_active % of peak sustained rate achieved during unit active cycles
      .pct_of_peak_sustained_elapsed % of peak sustained rate achieved during unit elapsed cycles
      .pct_of_peak_sustained_region % of peak sustained rate achieved over a user-specified "range" time
      .pct_of_peak_sustained_frame % of peak sustained rate achieved over a user-specified "frame" time
      + + * sub-metrics added in CUPTI 11.3. + +
    • +
    +
  • +
  • Counters may be either a raw counter from the GPU, or a calculated counter value. Every counter has four sub-metrics under it, which + are also called roll-ups: + + + + + + + + + + + + + + + + + + +
    .sumThe sum of counter values across all unit instances.
    .avgThe average counter value across all unit instances.
    .minThe minimum counter value across all unit instances.
    .maxThe maximum counter value across all unit instances.
    +
  • +
  • Ratios have three sub-metrics under it: + + + + + + + + + + + + + + +
    .pctThe value expressed as a percentage.
    .ratioThe value expressed as a ratio.
    .max_rateThe ratio's maximum value.
    +
  • +
  • Throughputs indicate how close a portion of the GPU reached to peak rate. Every throughput has the following sub-metrics: + + + + + + + + + + + + + + + + + + +
    .pct_of_peak_sustained_active % of peak sustained rate achieved during unit active cycles
    .pct_of_peak_sustained_elapsed % of peak sustained rate achieved during unit elapsed cycles
    .pct_of_peak_sustained_region % of peak sustained rate achieved over a user-specified "range" time
    .pct_of_peak_sustained_frame % of peak sustained rate achieved over a user-specified "frame" time
    +
  • +
+ + + At the configuration step, you must specify metric names. Counters, ratios, and throughputs are not directly schedulable. + +
+

Note: Burst metrics are only supported with MetricsContext APIs. + +

+
+ From CUPTI 11.3 onwards, due to not being useful for performance optimization following counter sub-metrics are not present in MetricEvaluator APIs and are only supported with MetricsContext APIs: + + + + + + + + + + + + + + + + + + + + + + +
.peak_burst the peak burst rate
.pct_of_peak_burst_active % of peak burst rate achieved during unit active cycles
.pct_of_peak_burst_elapsed % of peak burst rate achieved during unit elapsed cycles
.pct_of_peak_burst_region % of peak burst rate achieved over a user-specified "range"
.pct_of_peak_burst_frame % of peak burst rate achieved over a user-specified "frame"
+
+
+ From CUPTI 11.3 onwards, due to not being useful for performance optimization following throughput sub-metrics are not present in MetricEvaluator APIs and are only supported with MetricsContext APIs: + + + + + + + + + + + + + + + + + + +
.pct_of_peak_burst_active % of peak burst rate achieved during unit active cycles
.pct_of_peak_burst_elapsed % of peak burst rate achieved during unit elapsed cycles
.pct_of_peak_burst_region % of peak burst rate achieved over a user-specified "range" time
.pct_of_peak_burst_frame % of peak burst rate achieved over a user-specified "frame" time
+
+
+
+

Metrics Examples

+
+
+## non-metric names -- *not* directly evaluable
+sm__inst_executed                   # counter
+smsp__average_warp_latency          # ratio
+sm__throughput                      # throughput
+
+## a counter's four roll-ups as sub-metrics -- all evaluable
+sm__inst_executed.sum               # metric
+sm__inst_executed.avg               # metric
+sm__inst_executed.min               # metric
+sm__inst_executed.max               # metric
+
+## all names below are metrics -- all evaluable
+l1tex__data_bank_conflicts_pipe_lsu.sum
+l1tex__data_bank_conflicts_pipe_lsu.sum.peak_burst
+l1tex__data_bank_conflicts_pipe_lsu.sum.peak_sustained
+l1tex__data_bank_conflicts_pipe_lsu.sum.per_cycle_active
+l1tex__data_bank_conflicts_pipe_lsu.sum.per_cycle_elapsed
+l1tex__data_bank_conflicts_pipe_lsu.sum.per_cycle_in_region
+l1tex__data_bank_conflicts_pipe_lsu.sum.per_cycle_in_frame
+l1tex__data_bank_conflicts_pipe_lsu.sum.per_second
+l1tex__data_bank_conflicts_pipe_lsu.sum.pct_of_peak_sustained_active
+l1tex__data_bank_conflicts_pipe_lsu.sum.pct_of_peak_sustained_elapsed
+l1tex__data_bank_conflicts_pipe_lsu.sum.pct_of_peak_sustained_region
+l1tex__data_bank_conflicts_pipe_lsu.sum.pct_of_peak_sustained_frame
+
+                
+
+
+

Metrics Naming Conventions

+
+ Counters and metrics _generally_ obey the naming scheme: + +
    +
  • Unit-Level Counter : unit__(subunit?)_(pipestage?)_quantity_(qualifiers?)
  • +
  • Interface Counter : unit__(subunit?)_(pipestage?)_(interface)_quantity_(qualifiers?)
  • +
  • Unit Metric : (counter_name).(rollup_metric)
  • +
  • Sub-Metric : (counter_name).(rollup_metric).(submetric)
  • +
+ + where + +
    +
  • unit: A logical of physical unit of the GPU + +
  • +
  • subunit: The subunit within the unit where the counter was measured. Sometimes this is a pipeline mode instead. + +
  • +
  • pipestage: The pipeline stage within the subunit where the counter was measured. + +
  • +
  • quantity: What is being measured. Generally matches the "dimensional units". + +
  • +
  • qualifiers: Any additional predicates or filters applied to the counter. Often, an unqualified counter can be broken down + into several qualified sub-components. +
  • +
  • interface: Of the form sender2receiver, where sender is the source-unit and receiver is the destination-unit. +
  • +
  • rollup_metric: One of sum,avg,min,max. + +
  • +
  • submetric: refer to section Metric Entities
  • +
+ + Components are not always present. Most top-level counters have no qualifiers. Subunit and pipestage may be absent where + irrelevant, or there may be many subunit specifiers for detailed counters. + +
+
+
+

Cycle Metrics

+
+ Counters using the term cycles in the name report the number of cycles in the unit's clock domain. Unit-level cycle metrics include: + +
    +
  • unit__cycles_elapsed : The number of cycles within a range. The cycles' DimUnits are specific to the unit's clock domain. + +
  • +
  • unit__cycles_active : The number of cycles where the unit was processing data. + +
  • +
  • unit__cycles_stalled : The number of cycles where the unit was unable to process new data because its output interface was blocked. + +
  • +
  • unit__cycles_idle : The number of cycles where the unit was idle. + +
  • +
+ + Interface-level cycle counters are often (not always) available in the following variations: + +
    +
  • unit__(interface)_active : Cycles where data was transferred from source-unit to destination-unit. + +
  • +
  • unit__(interface)_stalled : Cycles where the source-unit had data, but the destination-unit was unable to accept data. + +
  • +
+
+
+
+
+
+

2.8.2. Raw Metrics

+
+
+

+ The raw metrics layer contains a list of low-level GPU counters, and the "scheduling" logic + needed to program the hardware. The binary output files (ConfigImage and CounterDataPrefix) + can be generated offline, stored on disk, and used on any compatible GPU. They do not need to be + generated on a machine where a GPU is available. + +

+

+ Refer to Metrics Configuration + to see where Raw Metrics fit into the overall data flow of the profiler. + +

+
+
+
+
+

2.8.3. Metrics Mapping Table

+
+

+ The table below lists the CUPTI metrics for devices with compute capability 7.0. + For each CUPTI metric the closest equivalent Perfworks metric or formula is given. If no equivalent Perfworks metric is available + the column is left blank. + Note that there can be some difference in the metric values between the CUPTI metric and the Perfworks metrics. + +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 5. Metrics Mapping Table from CUPTI to Perfworks for Compute Capability 7.0
CUPTI MetricPerfworks Metric or Formula
achieved_occupancysm__warps_active.avg.pct_of_peak_sustained_active
atomic_transactionsl1tex__t_set_accesses_pipe_lsu_mem_global_op_atom.sum + l1tex__t_set_accesses_pipe_lsu_mem_global_op_red.sum
atomic_transactions_per_request(l1tex__t_sectors_pipe_lsu_mem_global_op_atom.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_red.sum) / (l1tex__t_requests_pipe_lsu_mem_global_op_atom.sum + + l1tex__t_requests_pipe_lsu_mem_global_op_red.sum) +
branch_efficiencysmsp__sass_average_branch_targets_threads_uniform.pct
cf_executedsmsp__inst_executed_pipe_cbu.sum + smsp__inst_executed_pipe_adu.sum
cf_fu_utilization
cf_issued
double_precision_fu_utilizationsmsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active
dram_read_bytesdram__bytes_read.sum
dram_read_throughputdram__bytes_read.sum.per_second
dram_read_transactionsdram__sectors_read.sum
dram_utilizationdram__throughput.avg.pct_of_peak_sustained_elapsed
dram_write_bytesdram__bytes_write.sum
dram_write_throughputdram__bytes_write.sum.per_second
dram_write_transactionsdram__sectors_write.sum
eligible_warps_per_cyclesmsp__warps_eligible.sum.per_cycle_active
flop_count_dpsmsp__sass_thread_inst_executed_op_dadd_pred_on.sum + smsp__sass_thread_inst_executed_op_dmul_pred_on.sum + smsp__sass_thread_inst_executed_op_dfma_pred_on.sum + * 2 +
flop_count_dp_addsmsp__sass_thread_inst_executed_op_dadd_pred_on.sum
flop_count_dp_fmasmsp__sass_thread_inst_executed_op_dfma_pred_on.sum
flop_count_dp_mulsmsp__sass_thread_inst_executed_op_dmul_pred_on.sum
flop_count_hpsmsp__sass_thread_inst_executed_op_hadd_pred_on.sum + smsp__sass_thread_inst_executed_op_hmul_pred_on.sum + smsp__sass_thread_inst_executed_op_hfma_pred_on.sum + * 2 +
flop_count_hp_addsmsp__sass_thread_inst_executed_op_hadd_pred_on.sum
flop_count_hp_fmasmsp__sass_thread_inst_executed_op_hfma_pred_on.sum
flop_count_hp_mulsmsp__sass_thread_inst_executed_op_hmul_pred_on.sum
flop_count_spsmsp__sass_thread_inst_executed_op_fadd_pred_on.sum + smsp__sass_thread_inst_executed_op_fmul_pred_on.sum + smsp__sass_thread_inst_executed_op_ffma_pred_on.sum + * 2 +
flop_count_sp_addsmsp__sass_thread_inst_executed_op_fadd_pred_on.sum
flop_count_sp_fmasmsp__sass_thread_inst_executed_op_ffma_pred_on.sum
flop_count_sp_mulsmsp__sass_thread_inst_executed_op_fmul_pred_on.sum
flop_count_sp_special 
flop_dp_efficiencysmsp__sass_thread_inst_executed_ops_dadd_dmul_dfma_pred_on.avg.pct_of_peak_sustained_elapsed
flop_hp_efficiencysmsp__sass_thread_inst_executed_ops_hadd_hmul_hfma_pred_on.avg.pct_of_peak_sustained_elapsed
flop_sp_efficiencysmsp__sass_thread_inst_executed_ops_fadd_fmul_ffma_pred_on.avg.pct_of_peak_sustained_elapsed
gld_efficiencysmsp__sass_average_data_bytes_per_sector_mem_global_op_ld.pct
gld_requested_throughput 
gld_throughputl1tex__t_bytes_pipe_lsu_mem_global_op_ld.sum.per_second
gld_transactionsl1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum
gld_transactions_per_requestl1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio
global_atomic_requestsl1tex__t_requests_pipe_lsu_mem_global_op_atom.sum
global_hit_ratel1tex__t_sectors_pipe_lsu_mem_global_op_{op}_lookup_hit.sum / l1tex__t_sectors_pipe_lsu_mem_global_op_{op}.sum
global_load_requestsl1tex__t_requests_pipe_lsu_mem_global_op_ld.sum
global_reduction_requestsl1tex__t_requests_pipe_lsu_mem_global_op_red.sum
global_store_requestsl1tex__t_requests_pipe_lsu_mem_global_op_st.sum
gst_efficiencysmsp__sass_average_data_bytes_per_sector_mem_global_op_st.pct
gst_requested_throughput 
gst_throughputl1tex__t_bytes_pipe_lsu_mem_global_op_st.sum.per_second
gst_transactionsl1tex__t_bytes_pipe_lsu_mem_global_op_st.sum
gst_transactions_per_requestl1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio
half_precision_fu_utilizationsmsp__inst_executed_pipe_fp16.avg.pct_of_peak_sustained_active
inst_bit_convertsmsp__sass_thread_inst_executed_op_conversion_pred_on.sum
inst_compute_ld_stsmsp__sass_thread_inst_executed_op_memory_pred_on.sum
inst_controlsmsp__sass_thread_inst_executed_op_control_pred_on.sum
inst_executedsmsp__inst_executed.sum
inst_executed_global_atomicssmsp__sass_inst_executed_op_global_atom.sum
inst_executed_global_loadssmsp__inst_executed_op_global_ld.sum
inst_executed_global_reductionssmsp__inst_executed_op_global_red.sum
inst_executed_global_storessmsp__inst_executed_op_global_st.sum
inst_executed_local_loadssmsp__inst_executed_op_local_ld.sum
inst_executed_local_storessmsp__inst_executed_op_local_st.sum
inst_executed_shared_atomicssmsp__inst_executed_op_shared_atom.sum + smsp__inst_executed_op_shared_atom_dot_alu.sum + smsp__inst_executed_op_shared_atom_dot_cas.sum
inst_executed_shared_loadssmsp__inst_executed_op_shared_ld.sum
inst_executed_shared_storessmsp__inst_executed_op_shared_st.sum
inst_executed_surface_atomicssmsp__inst_executed_op_surface_atom.sum
inst_executed_surface_loadssmsp__inst_executed_op_surface_ld.sum + smsp__inst_executed_op_shared_atom_dot_alu.sum + smsp__inst_executed_op_shared_atom_dot_cas.sum
inst_executed_surface_reductionssmsp__inst_executed_op_surface_red.sum
inst_executed_surface_storessmsp__inst_executed_op_surface_st.sum
inst_executed_tex_opssmsp__inst_executed_op_texture.sum
inst_fp_16smsp__sass_thread_inst_executed_op_fp16_pred_on.sum
inst_fp_32smsp__sass_thread_inst_executed_op_fp32_pred_on.sum
inst_fp_64smsp__sass_thread_inst_executed_op_fp64_pred_on.sum
inst_integersmsp__sass_thread_inst_executed_op_integer_pred_on.sum
inst_inter_thread_communicationsmsp__sass_thread_inst_executed_op_inter_thread_communication_pred_on.sum
inst_issuedsmsp__inst_issued.sum
inst_miscsmsp__sass_thread_inst_executed_op_misc_pred_on.sum
inst_per_warpsmsp__average_inst_executed_per_warp.ratio
inst_replay_overhead
ipcsmsp__inst_executed.avg.per_cycle_active
issue_slot_utilizationsmsp__issue_active.avg.pct_of_peak_sustained_active
issue_slotssmsp__inst_issued.sum
issued_ipcsmsp__inst_issued.avg.per_cycle_active
l1_sm_lg_utilizationl1tex__lsu_writeback_active.avg.pct_of_peak_sustained_active
l2_atomic_throughputlts__t_sectors_srcunit_l1_op_atom.sum.per_second
l2_atomic_transactionslts__t_sectors_srcunit_l1_op_atom.sum
l2_global_atomic_store_byteslts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_global_op_atom.sum
l2_global_load_byteslts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_global_op_ld.sum
l2_local_global_store_byteslts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_local_op_st.sum + lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_global_op_st.sum
l2_local_load_byteslts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_local_op_ld.sum
l2_read_throughputlts__t_sectors_op_read.sum.per_second
l2_read_transactionslts__t_sectors_op_read.sum
l2_surface_load_byteslts__t_bytes_equiv_l1sectormiss_pipe_tex_mem_surface_op_ld.sum
l2_surface_store_byteslts__t_bytes_equiv_l1sectormiss_pipe_tex_mem_surface_op_st.sum
l2_tex_hit_ratelts__t_sector_hit_rate.pct
l2_tex_read_hit_ratelts__t_sector_op_read_hit_rate.pct
l2_tex_read_throughputlts__t_sectors_srcunit_tex_op_read.sum.per_second
l2_tex_read_transactionslts__t_sectors_srcunit_tex_op_read.sum
l2_tex_write_hit_ratelts__t_sector_op_write_hit_rate.pct
l2_tex_write_throughputlts__t_sectors_srcunit_tex_op_read.sum.per_second
l2_tex_write_transactionslts__t_sectors_srcunit_tex_op_read.sum
l2_utilizationlts__t_sectors.avg.pct_of_peak_sustained_elapsed
l2_write_throughputlts__t_sectors_op_write.sum.per_second
l2_write_transactionslts__t_sectors_op_write.sum
ldst_executed
ldst_fu_utilizationsmsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active
ldst_issued
local_hit_rate
local_load_requestsl1tex__t_requests_pipe_lsu_mem_local_op_ld.sum
local_load_throughputl1tex__t_bytes_pipe_lsu_mem_local_op_ld.sum.per_second
local_load_transactionsl1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum
local_load_transactions_per_requestl1tex__average_t_sectors_per_request_pipe_lsu_mem_local_op_ld.ratio
local_memory_overhead
local_store_requestsl1tex__t_requests_pipe_lsu_mem_local_op_st.sum
local_store_throughputl1tex__t_sectors_pipe_lsu_mem_local_op_st.sum.per_second
local_store_transactionsl1tex__t_sectors_pipe_lsu_mem_local_op_st.sum
local_store_transactions_per_requestl1tex__average_t_sectors_per_request_pipe_lsu_mem_local_op_st.ratio
nvlink_data_receive_efficiency
nvlink_data_transmission_efficiency
nvlink_overhead_data_received
nvlink_overhead_data_transmitted
nvlink_receive_throughput
nvlink_total_data_received
nvlink_total_data_transmitted
nvlink_total_nratom_data_transmitted
nvlink_total_ratom_data_transmitted
nvlink_total_response_data_received
nvlink_total_write_data_transmitted
nvlink_transmit_throughput
nvlink_user_data_received
nvlink_user_data_transmitted
nvlink_user_nratom_data_transmitted
nvlink_user_ratom_data_transmitted
nvlink_user_response_data_received
nvlink_user_write_data_transmitted
pcie_total_data_receivedpcie__read_bytes.sum
pcie_total_data_transmittedpcie__write_bytes.sum
shared_efficiencysmsp__sass_average_data_bytes_per_wavefront_mem_shared.pct
shared_load_throughputl1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum.per_second
shared_load_transactionsl1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum
shared_load_transactions_per_request
shared_store_throughputl1tex__data_pipe_lsu_wavefronts_mem_shared_op_st.sum.per_second
shared_store_transactionsl1tex__data_pipe_lsu_wavefronts_mem_shared_op_st.sum
shared_store_transactions_per_request
shared_utilizationl1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed
single_precision_fu_utilizationsmsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active
sm_efficiencysmsp__cycles_active.avg.pct_of_peak_sustained_elapsed
sm_tex_utilizationl1tex__texin_sm2tex_req_cycles_active.avg.pct_of_peak_sustained_elapsed
special_fu_utilizationsmsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active
stall_constant_memory_dependencysmsp__warp_issue_stalled_imc_miss_per_warp_active.pct
stall_exec_dependencysmsp__warp_issue_stalled_short_scoreboard_per_warp_active.pct + smsp__warp_issue_stalled_wait_per_warp_active.pct
stall_inst_fetchsmsp__warp_issue_stalled_no_instruction_per_warp_active.pct
stall_memory_dependencysmsp__warp_issue_stalled_long_scoreboard_per_warp_active.pct
stall_memory_throttlesmsp__warp_issue_stalled_drain_per_warp_active.pct + smsp__warp_issue_stalled_lg_throttle_per_warp_active.pct
stall_not_selectedsmsp__warp_issue_stalled_not_selected_per_warp_active.pct
stall_othersmsp__warp_issue_stalled_misc_per_warp_active.pct + smsp__warp_issue_stalled_dispatch_stall_per_warp_active.pct
stall_pipe_busysmsp__warp_issue_stalled_mio_throttle_per_warp_active.pct + smsp__warp_issue_stalled_math_pipe_throttle_per_warp_active.pct
stall_sleepingsmsp__warp_issue_stalled_sleeping_per_warp_active.pct
stall_syncsmsp__warp_issue_stalled_membar_per_warp_active.pct + smsp__warp_issue_stalled_barrier_per_warp_active.pct
stall_texturesmsp__warp_issue_stalled_tex_throttle_per_warp_active.pct
surface_atomic_requestsl1tex__t_requests_pipe_tex_mem_surface_op_atom.sum
surface_load_requestsl1tex__t_requests_pipe_tex_mem_surface_op_ld.sum
surface_reduction_requestsl1tex__t_requests_pipe_tex_mem_surface_op_red.sum
surface_store_requestsl1tex__t_requests_pipe_tex_mem_surface_op_st.sum
sysmem_read_byteslts__t_sectors_aperture_sysmem_op_read* 32
sysmem_read_throughputlts__t_sectors_aperture_sysmem_op_read.sum.per_second
sysmem_read_transactionslts__t_sectors_aperture_sysmem_op_read.sum
sysmem_read_utilization
sysmem_utilization
sysmem_write_byteslts__t_sectors_aperture_sysmem_op_write * 32
sysmem_write_throughputlts__t_sectors_aperture_sysmem_op_write.sum.per_second
sysmem_write_transactionslts__t_sectors_aperture_sysmem_op_write.sum
sysmem_write_utilization
tensor_precision_fu_utilizationsm__pipe_tensor_cycles_active.avg.pct_of_peak_sustained_active
tex_cache_hit_ratel1tex__t_sector_hit_rate.pct
tex_cache_throughput
tex_cache_transactionsl1tex__lsu_writeback_active.avg.pct_of_peak_sustained_active + l1tex__tex_writeback_active.avg.pct_of_peak_sustained_active
tex_fu_utilizationsmsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active
tex_sm_tex_utilizationl1tex__f_tex2sm_cycles_active.avg.pct_of_peak_sustained_elapsed
tex_sm_utilizationsm__mio2rf_writeback_active.avg.pct_of_peak_sustained_elapsed
tex_utilization
texture_load_requestsl1tex__t_requests_pipe_tex_mem_texture.sum
warp_execution_efficiencysmsp__thread_inst_executed_per_inst_executed.ratio
warp_nonpred_execution_efficiencysmsp__thread_inst_executed_per_inst_executed.pct
+
+
+
+
+

2.8.4. Events Mapping Table

+
+

+ The table below lists the CUPTI events for devices with compute capability 7.0. + For each CUPTI event the closest equivalent Perfworks metric or formula is given. If no equivalent Perfworks metric is available + the column is left blank. + Note that there can be some difference in the values between the CUPTI event and the Perfworks metrics. + +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 6. Events Mapping Table from CUPTI events to Perfworks metrics for Compute Capability 7.0
CUPTI EventPerfworks Metric or Formula
active_cyclessm__cycles_active.sum
active_cycles_pmsm__cycles_active.sum
active_cycles_syssys__cycles_active.sum
active_warpssm__warps_active.sum
active_warps_pmsm__warps_active.sum
atom_countsmsp__inst_executed_op_generic_atom_dot_alu.sum
elapsed_cycles_pmsm__cycles_elapsed.sum
elapsed_cycles_smsm__cycles_elapsed.sum
elapsed_cycles_syssys__cycles_elapsed.sum
fb_subp0_read_sectorsdram__sectors_read.sum
fb_subp1_read_sectorsdram__sectors_read.sum
fb_subp0_write_sectorsdram__sectors_write.sum
fb_subp1_write_sectorsdram__sectors_write.sum
global_atom_cassmsp__inst_executed_op_generic_atom_dot_cas.sum
gred_countsmsp__inst_executed_op_global_red.sum
inst_executedsm__inst_executed.sum
inst_executed_fma_pipe_s0smsp__inst_executed_pipe_fma.sum
inst_executed_fma_pipe_s1smsp__inst_executed_pipe_fma.sum
inst_executed_fma_pipe_s2smsp__inst_executed_pipe_fma.sum
inst_executed_fma_pipe_s3smsp__inst_executed_pipe_fma.sum
inst_executed_fp16_pipe_s0smsp__inst_executed_pipe_fp16.sum
inst_executed_fp16_pipe_s1smsp__inst_executed_pipe_fp16.sum
inst_executed_fp16_pipe_s2smsp__inst_executed_pipe_fp16.sum
inst_executed_fp16_pipe_s3smsp__inst_executed_pipe_fp16.sum
inst_executed_fp64_pipe_s0smsp__inst_executed_pipe_fp64.sum
inst_executed_fp64_pipe_s1smsp__inst_executed_pipe_fp64.sum
inst_executed_fp64_pipe_s2smsp__inst_executed_pipe_fp64.sum
inst_executed_fp64_pipe_s3smsp__inst_executed_pipe_fp64.sum
inst_issued1sm__inst_issued.sum
l2_subp0_read_sector_misseslts__t_sectors_op_read_lookup_miss.sum
l2_subp1_read_sector_misseslts__t_sectors_op_read_lookup_miss.sum
l2_subp0_read_sysmem_sector_querieslts__t_sectors_aperture_sysmem_op_read.sum
l2_subp1_read_sysmem_sector_querieslts__t_sectors_aperture_sysmem_op_read.sum
l2_subp0_read_tex_hit_sectorslts__t_sectors_srcunit_tex_op_read_lookup_hit.sum
l2_subp1_read_tex_hit_sectorslts__t_sectors_srcunit_tex_op_read_lookup_hit.sum
l2_subp0_read_tex_sector_querieslts__t_sectors_srcunit_tex_op_read.sum
l2_subp1_read_tex_sector_querieslts__t_sectors_srcunit_tex_op_read.sum
l2_subp0_total_read_sector_querieslts__t_sectors_op_read.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum
l2_subp1_total_read_sector_querieslts__t_sectors_op_read.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum
l2_subp0_total_write_sector_querieslts__t_sectors_op_write.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum
l2_subp1_total_write_sector_querieslts__t_sectors_op_write.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum
l2_subp0_write_sector_misseslts__t_sectors_op_write_lookup_miss.sum
l2_subp1_write_sector_misseslts__t_sectors_op_write_lookup_miss.sum
l2_subp0_write_sysmem_sector_querieslts__t_sectors_aperture_sysmem_op_write.sum
l2_subp1_write_sysmem_sector_querieslts__t_sectors_aperture_sysmem_op_write.sum
l2_subp0_write_tex_hit_sectorslts__t_sectors_srcunit_tex_op_write_lookup_hit.sum
l2_subp1_write_tex_hit_sectorslts__t_sectors_srcunit_tex_op_write_lookup_hit.sum
l2_subp0_write_tex_sector_querieslts__t_sectors_srcunit_tex_op_write.sum
l2_subp1_write_tex_sector_querieslts__t_sectors_srcunit_tex_op_write.sum
not_predicated_off_thread_inst_executedsmsp__thread_inst_executed_pred_on.sum
pcie_rx_active_pulse 
pcie_tx_active_pulse 
prof_trigger_00 
prof_trigger_01 
prof_trigger_02 
prof_trigger_03 
prof_trigger_04 
prof_trigger_05 
prof_trigger_06 
prof_trigger_07 
inst_issued0smsp__issue_inst0.sum
sm_cta_launchedsm__ctas_launched.sum
shared_loadsmsp__inst_executed_op_shared_ld.sum
shared_storesmsp__inst_executed_op_shared_st.sum
generic_loadsmsp__inst_executed_op_generic_ld.sum
generic_storesmsp__inst_executed_op_generic_st.sum
global_loadsmsp__inst_executed_op_global_ld.sum
global_storesmsp__inst_executed_op_global_st.sum
local_loadsmsp__inst_executed_op_local_ld.sum
local_storesmsp__inst_executed_op_local_st.sum
shared_atomsmsp__inst_executed_op_shared_atom.sum
shared_atom_cassmsp__inst_executed_op_shared_atom_dot_cas.sum
shared_ld_bank_conflictl1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum
shared_st_bank_conflictl1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum
shared_ld_transactionsl1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum
shared_st_transactionsl1tex__data_pipe_lsu_wavefronts_mem_shared_op_st.sum
tensor_pipe_active_cycles_s0smsp__pipe_tensor_cycles_active.sum
tensor_pipe_active_cycles_s1smsp__pipe_tensor_cycles_active.sum
tensor_pipe_active_cycles_s2smsp__pipe_tensor_cycles_active.sum
tensor_pipe_active_cycles_s3smsp__pipe_tensor_cycles_active.sum
thread_inst_executedsmsp__thread_inst_executed.sum
warps_launchedsmsp__warps_launched.sum
+
+
+
+
+
+

2.9. Migration to the Profiling API

+
+
+
The CUPTI event APIs from the header cupti_events.h + and metric APIs from the header cupti_metrics.h will + be deprecated in a future CUDA release. The NVIDIA Volta platform is the last architecture + on which these APIs are supported. + These are being replaced by the Profiling API in the header + cupti_profiler_target.h and Perfworks Metric API + in the headers nvperf_host.h and nvperf_target.h. These provide low and deterministic + profiling overhead on the target system. These APIs also have other significant enhancements such as: + + +
+

GPU architectures supported by different CUPTI APIs are listed at the + table. + Both the event and metric APIs and the profiling APIs are supported for Volta. + This is to enable transition of code to the profiling APIs. But one cannot mix + the usage of the event and metric APIs and the profiling APIs. +

+

The Profiling APIs are supported on all CUDA supported platforms except Android.

+

It is important to note that for support of future GPU architectures and feature improvements + (such as performance overhead reduction and additional performance metrics), users should use + the Profiling APIs. There are few features which are not supported by Profiling APIs, + refer to the section for differences from event and metric APIs. +

+

However note that there are no changes to the CUPTI Activity and Callback APIs and these + will continue to be supported for the current and future GPU architectures. +

+
+
+
+
+

2.10. CUPTI PC Sampling API

+
+
+

+ A new set of CUPTI APIs for PC sampling data collection are provided in the header file cupti_pcsampling.h + which support continuous mode data collection without serializing kernel execution and have a lower runtime overhead. + Along with these a utility library is provided in the header file cupti_pcsampling_util.h which has APIs for GPU + assembly to CUDA-C source correlation and for reading and writing the PC sampling data from/to files. + +

+

+ The PC Sampling APIs are supported on all CUDA supported platforms. These are supported on Volta and later GPU + architectures, i.e. devices with compute capability 7.0 and higher. + +

+
+ Overview of Features: + +
    +
  • Two sampling modes – Continuous (concurrent kernels) or Serialized (one kernel at a time)​.
  • +
  • Option to select stall reasons to collect.​
  • +
  • Ability to collect GPU PC sampling data for entire application duration or for specific CPU code + ranges (defined by start and stop APIs).​ +
  • +
  • API to flush GPU PC sampling data.​
  • +
  • APIs to support Offline and Runtime correlation of GPU PC samples to CUDA C source lines and GPU + assembly instructions​. +
  • +
+
+

+ Samples are provided to demonstrate how to write the injection library to collect the PC sampling information, + and how to parse the generated files using the utility APIs to print the stall reasons counter values and associate + those with the GPU assembly instructions and CUDA-C source code. Refer to the samples + pc_sampling_continuous, + pc_sampling_utility + and pc_sampling_start_stop. + +

+
Note: + PC Sampling APIs from the header cupti_activity.h would be referred as PC Sampling Activity APIs + and APIs from the header cupti_pcsampling.h would be referred as PC Sampling APIs. + +
+
+
+
+

2.10.1. Configuration Attributes

+
+

+ The following table lists the PC sampling configuration attributes which can be set using the cuptiPCSamplingSetConfigurationAttribute() API. + +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 7. PC Sampling Configuration Attributes
Configuration AttributeDescriptionDefault ValueComparison of PC Sampling APIs with CUPTI PC Sampling Activity APIsGuideline to Tune Configuration Option
Collection modePC Sampling collection mode - Continuous or Kernel SerializedContinuous +

Continuous mode is new.

+

Kernel Serialized mode is equivalent to the kernel level functionality provided by the CUPTI PC sampling + Activity APIs. +

+
 
Sampling period +

Sampling period for PC Sampling. Valid values for the sampling periods are between 5 to 31 both inclusive. + This will set the sampling period to (2^samplingPeriod) cycles. +

+

e.g. for sampling period = 5 to 31, cycles = 32, 64, 128,..., 2^31

+
CUPTI defined value is based on number of SMs +

Dropped current support for 5 levels(MIN, LOW, MID, HIGH, MAX) for sampling period.

+

The new "sampling period" is equivalent to the "samplingPeriod2" field in CUpti_ActivityPCSamplingConfig.

+
Low sampling period means a high sampling frequency which can result in dropping of samples. + Very high sampling period can cause low sampling frequency and no sample generation. +
Stall reason +

Stall reasons to collect

+

Input is a pointer to an array of the stall reason indexes to collect.

+
All stall reasons will be collectedWith the CUPTI PC sampling Activity APIs there is no option to select which stall reasons to collect. + Also the list of supported stall reasons has changed. +  
Scratch buffer size +

Size of SW buffer for raw PC counter data downloaded from HW buffer.

+

Approximately it takes 16 Bytes (and some fixed size memory) to accommodate one PC with one stall reason

+

e.g. 1 PC with 1 stall reason = 32 Bytes

+

1 PC with 2 stall reason = 48 Bytes

+

1 PC with 4 stall reason = 96 Bytes

+
+

1 MB

+

(which can accommodate approximately 5500 PCs with all stall reasons)

+
NewClients can choose scratch buffer size as per memory budget. Very small scratch buffer size can cause + runtime overhead as more iterations would be required to accommodate and process more PC samples +
Hardware buffer size +

Size of HW buffer in bytes.

+

If sampling period is too less, HW buffer can overflow and drop PC data

+
512 MBNewDevice accessible buffer for samples. Less hardware buffer size with low sampling periods, can cause + overflow and dropping of PC data. High hardware buffer size can impact application execution due to lower + amount of device memory being available +
Enable start/stop control +

Control over PC Sampling data collection range.

+

1 - Allows user to start and stop PC Sampling using APIs

+
0 (disabled)New 
+
+
+
+
+

2.10.2. Stall Reasons Mapping Table

+
+

+ The table below lists the stall reasons mapping from PC Sampling Activity APIs to PC Sampling APIs. + Note: Stall reasons with suffix _not_issued represents latency samples. These samples indicate that + no instruction was issued in that cycle from the warp scheduler from where the warp was sampled. + +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 8. Stall Reasons Mapping Table from PC Sampling Activity APIs to PC Sampling APIs
+

PC Sampling Activity API Stall Reasons

+

(common prefix: CUPTI_ACTIVITY_PC_SAMPLING_STALL_)

+
+

PC Sampling API Stall Reasons

+

(common prefix: smsp__pcsamp_warps_issue_stalled_)

+
NONE +

selected

+

selected_not_issued

+
INST_FETCH +

branch_resolving

+

branch_resolving_not_issued

+

no_instructions

+

no_instructions_not_issued

+
EXEC_DEPENDENCY +

short_scoreboard

+

short_scoreboard_not_issued

+

wait

+

wait_not_issued

+
MEMORY_DEPENDENCY +

long_scoreboard

+

long_scoreboard_not_issued

+
TEXTURE +

tex_throttle

+

tex_throttle_not_issued

+
SYNC +

barrier

+

barrier_not_issued

+

membar

+

membar_not_issued

+
CONSTANT_MEMORY_DEPENDENCY +

imc_miss

+

imc_miss_not_issued

+
PIPE_BUSY +

mio_throttle

+

mio_throttle_not_issued

+

math_pipe_throttle

+

math_pipe_throttle_not_issued

+
MEMORY_THROTTLE +

drain

+

drain_not_issued

+

lg_throttle

+

lg_throttle_not_issued

+
NOT_SELECTED +

not_selected

+

not_selected_not_issued

+
OTHER +

misc

+

misc_not_issued

+

dispatch_stall

+

dispatch_stall_not_issued

+
SLEEPING +

sleeping

+

sleeping_not_issued

+
+
+
+
+
+

2.10.3. Data Structure Mapping Table

+
+

+ The table below lists the data structure mapping from PC Sampling Activity APIs to PC Sampling APIs. + +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 9. Data structure Mapping Table from PC Sampling Activity APIs to PC Sampling APIs
PC Sampling Activity API structuresPC Sampling API structures
CUpti_ActivityPCSamplingConfigCUpti_PCSamplingConfigurationInfo
CUpti_ActivityPCSamplingStallReason +

CUpti_PCSamplingStallReason

+

Refer Stall Reasons Mapping Table

+
CUpti_ActivityPCSampling3CUpti_PCSamplingPCData
CUpti_ActivityPCSamplingRecordInfoCUpti_PCSamplingData
+
+
+
+
+

2.10.4. Data flushing

+
+
+
+ CUPTI clients can periodically flush GPU PC sampling data using the API cuptiPCSamplingGetData(). + + Besides periodic flushing of GPU PC sampling data, CUPTI clients need to also flush the GPU PC sampling data + at the following points to maintain the uniqueness of PCs: + +
    +
  • For continuous collection mode CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS - after each module load-unload-load sequence.
  • +
  • For serialized collection mode CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED - after completion of each kernel.
  • +
  • For range profiling using the configuration option CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL + - at the end of the range i.e. after cuptiPCSamplingStop() API. +
  • +
+ + + If application is profiled in the continuous collection mode with range profiling disabled, and there is + no module unload, CUPTI clients can collect data in two ways: + +
    +
  • By using cuptiPCSamplingGetData() API periodically. +
  • +
  • By using cuptiPCSamplingDisable() on application exit and reading GPU PC sampling data from + sampling data buffer passed during configuration. +
  • +
+
Note: In case, cuptiPCSamplingGetData() API is not called periodically, the sampling data buffer + passed during configuration should be big enough to hold the data for all the PCs. + +
+
Note: Field remainingNumPcs of the struct CUpti_PCSamplingData helps in identifying the + number of PC records available with CUPTI. User can adjust the periodic flush interval based on it. + Further user need to ensure that all remaining records can be accommodated in the sampling data buffer + passed during configuration before disabling the PC sampling. + +
+
+
+
+
+
+

2.10.5. SASS Source Correlation

+
+
+ Building SASS source correlation for a PC can be split into two parts: + +
    +
  • Correlation of a PC to a SASS instruction - PC to SASS correlation is done during + PC sampling at run time and the SASS data is available in the PC record. Fields cubinCrc, + pcOffset and functionName in the PC record help in correlatation of a PC + with a SASS instruction. + You can extract cubins from the application executable or library using the cuobjdump + utility by executing the command cuobjdump -xelf all exe/lib. The cuobjump + utility version should match with the CUDA Toolkit version used to build the CUDA application + executable or library files. You can find the cubinCrc for extracted cubins using the + cuptiGetCubinCrc() API. With the help of cubinCrc you can find out the cubin to + which a PC belongs. The cubin can be disassembled using the nvdisasm utility that + comes with the CUDA toolkit. + +
  • +
  • Correlation of a SASS instruction to a CUDA source line - Correlation of GPU PC + samples to CUDA C source lines can be done offline as well as at runtime with the help of + the cuptiGetSassToSourceCorrelation() API. + +
  • +
+
+
JIT compiled cubins - In case of JIT compiled cubins, it is not possible to extract the + cubin from the executable or library. For this case one can subscribe to one of the + CUPTI_CBID_RESOURCE_MODULE_LOADED or CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING + or CUPTI_CBID_RESOURCE_MODULE_PROFILED callbacks. It returns a CUpti_ModuleResourceData + structure having the CUDA binary. This binary can be stored in a file and can be used for offline CUDA C + source correlation. + +
+
+
+
+

2.10.6. API Usage

+
+
+
Here is a pseudo code which shows how to collect the PC sampling data for specific CPU code ranges: +
+    
+void Collection()
+{
+    // Select collection mode
+    CUpti_PCSamplingConfigurationInfoParams pcSamplingConfigurationInfoParams = {};
+
+    CUpti_PCSamplingConfigurationInfo collectionMode = {};
+    collectionMode.attributeData.collectionModeData.collectionMode = CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS;
+
+    pcSamplingConfigurationInfoParams.numAttributes = 1;
+    pcSamplingConfigurationInfoParams.pPCSamplingConfigurationInfo = &collectionMode;
+    cuptiPCSamplingSetConfigurationAttribute(&pcSamplingConfigurationInfoParams);
+
+    // Select stall reasons to collect
+    {
+        // Get number of supported stall reasons
+        cuptiPCSamplingGetNumStallReasons();
+        // Get number of supported stall reason names and corresponding indexes
+        cuptiPCSamplingGetStallReasons();
+        // Set selected stall reasons
+        cuptiPCSamplingSetConfigurationAttribute();
+    }
+
+    // Select code range using start/stop APIs
+    // Opt-in for start and stop PC Sampling using APIs cuptiPCSamplingStart and cuptiPCSamplingStop
+    CUpti_PCSamplingConfigurationInfo enableStartStop = {};
+    enableStartStop.attributeType = CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL;
+    enableStartStop.attributeData.enableStartStopControlData.enableStartStopControl = true;
+
+    pcSamplingConfigurationInfoParams.numAttributes = 1;
+    pcSamplingConfigurationInfoParams.pPCSamplingConfigurationInfo = &enableStartStop;
+    cuptiPCSamplingSetConfigurationAttribute(&pcSamplingConfigurationInfoParams);
+
+    // Enable PC Sampling
+    cuptiPCSamplingEnable();
+
+    kernelA <<<blocks, threads, 0, s0>>>(...);                  // KernelA is not sampled
+
+    // Start PC sampling collection
+    cuptiPCSamplingStart();
+    {
+        // KernelB and KernelC might run concurrently since 'continuous' sampling collection mode is selected
+        kernelB <<<blocks, threads, 0, s0>>>(...);              // KernelB is sampled
+        kernelC <<<blocks, threads, 0, s1>>>(...);              // KernelC is sampled
+    }
+    // Stop PC sampling collection
+    cuptiPCSamplingStop();
+    // Flush PC sampling data
+    cuptiPCSamplingGetData();
+
+    kernelD <<<blocks, threads, 0, s0>>>(...);                  // KernelD is not sampled
+
+    // Start PC sampling collection
+    cuptiPCSamplingStart();
+    {
+        kernelE <<<blocks, threads, 0, s0>>>(...);              // KernelE is sampled
+    }
+    // Stop PC sampling collection
+    cuptiPCSamplingStop();
+    // Flush PC sampling data
+    cuptiPCSamplingGetData();
+
+    // Disable PC Sampling
+    cuptiPCSamplingDisable();
+}
+        
+
+
+
+
+

2.10.7. Limitations

+
+
+
Known limitations and issues: + +
    +
  • In the serial mode, PC Sampling APIs do not provide information for correlation of PC sampling + data for a kernel to the CUDA kernel launch API. This is supported by the PC Sampling activity APIs. + For continuous mode, this cannot be supported due to hardware limitations. +
  • +
  • PC Sampling APIs don't support simultaneous sampling of multiple CUDA contexts on a GPU. + However, simultaneous sampling of single CUDA context per GPU is supported. + Before enabling and configuring the PC sampling on a different CUDA context on the same GPU, PC sampling + needs to be disabled on the other context. +
  • +
+
+
+
+
+
+
+

2.11. CUPTI Checkpoint API

+
+
+

+ Starting with CUDA 11.5, CUPTI ships with a new library to assist tool + developers who wish to replay kernels under direct control, such as tools using the Profiling API + User Replay mode. This new Checkpoint library provides support for automatically saving + and restoring device state for many common uses. + +

+

+ A device checkpoint is a managed copy of device functional state - including values in memory, + along with some (but not all) other user visible state of the device. When a checkpoint is saved, + this state is saved to internal buffers, preferentially using free device, then host, and finally + filesystem space to save the data. The user tool maintains a handle to a checkpoint, and is able to + restore the checkpoint with a single call, restoring the state so a kernel may be re-executed and + expect to have the same device state as when the checkpoint was saved. + +

+

+ Once saved, a checkpoint may be restored any time including after multiple kernels have been launched, + though currently there are limitations on which user calls (CUDA or driver API calls) have been + validated to work between a Save and Restore. It currently is known safe to + launch multiple kernels on a context and to do memcpy calls before restoring a checkpoint. Future + versions of CUPTI will extend this to support additional API calls between a Save and + Restore. + +

+

+ Checkpoints may be saved during injected kernel launch callbacks or directly coded into a target application. + +

+

+ Certain APIs are known to not work with the version of the Checkpoint API shipped with + CUPTI 11.5, including Stream Capture mode. + +

+
+
+
+

2.11.1. Usage

+
+
+

+ There is one header for the library, cupti_checkpoint.h, which needs to be included, and libcheckpoint + needs to be linked in to the application or injection library. Though + the checkpoint library doesn't depend on cupti, the error codes returned by the API are shared with cupti, so linking + libcupti in is needed in order to translate the return codes to string representations. + +

+

+ The Checkpoint API follows a similar design to other CUPTI APIs. + API behavior is controlled through a structure, CUpti_Checkpoint, which is initialized + by a tool or application, then passed to cuptiCheckpointSave. If the call is successful, + the structure saves a handle to a checkpoint. At this point, the application may make a series of calls + which modify device state (kernels which update memory, memcopies, etc), and when the device state should + be restored, the tool can use the same structure in calls to cuptiCheckpointRestore, and + finally a call to cuptiCheckpointFree to release the resources used by the checkpoint object. + +

+

+ Multiple checkpoints may be saved at the same time. If multiple checkpoints exist, they operate entirely + independently - each checkpoint consumes the full resources needed to restore the device state at the point + it was saved. Order of operations between multiple checkpoints is not enforced by the API - while a common + use for multiple checkpoints may be a nested pattern, it is also possible to interleave checkpoint operations. + +

+

+ Between a cuptiCheckpointSave and cuptiCheckpointRestore, any number of + standard kernel launches (or equivalent API calls such as cuLaunchKernel) or memcpy calls + may be made. Additionally, any host (cpu) side calls may be made that do not affect device state. It is + possible that other CUDA or driver API calls may be made, but have not been validated with the 11.5 release. + +

+

+ Several options exist in the CUpti_Checkpoint structure. They must be set prior to the + initial cuptiCheckpointSave using that structure. Any further changes to the structure + are ignored until after a call to cuptiCheckpointFree, at which point the structure + can be re-configured and re-used. + +

+
+ Important per-checkpoint options: + +
    +
  • structSize - must be set to the value of CUpti_Checkpoint_STRUCT_SIZE
  • +
  • ctx - if NULL, the checkpoint will be of the default CUDA context, otherwise, specifies which context +
  • +
  • reserveDeviceMB - Restrict a checkpoint save from using at least this much device memory +
  • +
  • reserveHostMB - Restrict a checkpoint save from using at least this much host memory +
  • +
  • allowOverwrite - It is normally an error to call Save using an existing checkpoint handle (one which has not + been Freed). When set, this option allows the Save operation to be called multiple times on a handle. Note that when using + this option, + the CUpti_Checkpoint options are not re-read on any subsequent Save. To read new options, the handle must + be passed to cuptiCheckpointFree prior to the cuptiCheckpointSave call. + +
  • +
  • optimizations - Bitmask of options for checkpoint behavior + +
      +
    • CUPTI_CHECKPOINT_OPT_TRANSFER - Normally when restoring a checkpoint, all existing device memory at the time + of the save is restored. This optimization adds a test to see whether a block of memory has changed before restoring it and + caches + the results for subsequent calls to Restore. Use of this option requires that all Restore calls be done at the same + point in an application for a given checkpoint. As the optimization may be computationally expensive, it is most + useful when there is a significant amount of data that can be skipped and there will be several calls to Restore + the checkpoint. + +
    • +
    +
  • +
+
+
+
+
+
+

2.11.2. Restrictions

+
+
+

+ Checkpoints API calls may not be made during a stream capture. They also may not be inserted into a graph. + Beyond kernel launches (cuLaunchKernel, standard kernel<<<>>> launches, etc) and memcpy + calls, the remaining CUDA and driver API calls have not been validated within a + CheckpointSave and Restore region. + Any other CUDA or driver API calls (example - device malloc or free) may work, or may cause undetermined behavior. + Additional APIs will be validated to work with the Checkpoint API + in future releases. + +

+

+ The Checkpoint API does not have visibility into which API calls have been made between + cuptiCheckpointSave and cuptiCheckpointRestore calls, + and may not be able to correctly detect error cases if unsupported calls have been made. + In this case it is possible that device state may only be partially restored by + cuptiCheckpointRestore, which may casue functionally incorrect behavior + in subsequent device calls. + +

+

+ The Checkpoint API only restores functionally visible device state, not performance + critical state. Some performance characteristics, such as state of the caches, will + not be saved by a checkpoint, and saving or restoring a checkpoint + may change the occupancy and alter performance for subsequent device calls. + +

+

+ The Checkpoint API makes no attempt to restore host (non-device) state, beyond freeing + the resources it internally uses during a call to cuptiCheckpointFree. + +

+

+ The Checkpoint API by default uses device memory, host memory, and finally the filesystem to + back up the device state. It is possible that addition of a cuptiCheckpointSave + causes a later device allocation to fail due to the increased device memory usage. (Similarly, + host memory is also used, and may be affected by a checkpoint). To allow the user to guarantee + a certain amount of device or host memory remains available for later use, reserveDeviceMB + and reserveHostMB fields in the CUpti_Checkpoint struct + may be set. Use of these fields will guarantee that the device or host memory will leave + that much memory free during a cuptiCheckpointSave call, but may cause + the Checkpoint API call performance to degrade due to increased use of slower storage spaces. + +

+
+
+
+
+

2.11.3. Examples

+
+
+
+ The Checkpoint API does not require any other CUPTI calls. A simple use case + could be to compare the output of three different implementations of a kernel. Pseudocode for this could look like: +
+CUpti_Checkpoint cp = { CUpti_Checkpoint_STRUCT_SIZE };
+
+int kernel = 0;
+do
+{
+  if (kernel == 0)
+    cuptiCheckpointSave(&cp);
+  else
+    cuptiCheckpointRestore(&cp);
+
+  if (kernel == 0)
+    kernel_1<<<>>>(...);
+  else if (kernel == 1)
+    kernel_2<<<>>>(...);
+  else if (kernel == 2)
+    kernel_3<<<>>>(...);
+} while (kernel++ < 3);
+
+cuptiCheckpointFree(&cp);
+
+ In this example, even if any of the kernels modify their own input data, the subsequent passes through the loop will still + run correctly - the modified input data would be restored by each call to cuptiCheckpointRestore before the + next kernel runs. + This is particularly useful when a programmer does not know the exact state of the device prior to a kernel call - the + Checkpoint API ensures that all needed data is saved and restored, which would not otherwise be practical or perhaps even + possible + in some complex cases. + +
+
+ Another possible use case could be for fuzzing - randomly modifying input to a kernel, and ensuring it performs as expected. + Instead of manually restoring device state to a known good point, the Checkpoint API and initialize a good state, and the + fuzzer + can modify only what is needed. +
+CUpti_Checkpoint cp = { CUpti_Checkpoint_STRUCT_SIZE };
+
+int i = 0;
+do
+{
+  if (i == 0)
+    cuptiCheckpointSave(&cp);
+  else
+    cuptiCheckpointRestore(&cp);
+
+  setup_test<<<>>>(i, ...);
+
+  kernel<<<>>>(...);
+
+  validate_result<<<>>>(i, ...);
+} while (i++ < num_tests);
+
+cuptiCheckpointFree(&cp);
+
+
+ Finally, the Checkpoint API is very useful for the User Replay mode of the CUPTI Profiling API. The User Replay + mode can be very desireable as it allows kernels to run concurrently, which Kernel Replay mode does not, and only + replays parts of the application which are within a performance region, unlike Applicatin Replay mode. However, in this mode, + a kernel potentially needs to be launched multiple times in order to gather all requested metrics. This is complicated + when the kernel may modify some of its own input data, and without the Checkpoint API, would require the tool developer + to handle restoring any modified input data manually. It is difficult for a tool to automatically know whether any + data needs to be restored before each iteration, or even what the existing state of the device is. Using the Checkpoint API, + the tool can guarantee that input data will be restored each pass. +
+CUpti_Checkpoint cp = { CUpti_Checkpoint_STRUCT_SIZE };
+
+// Pseudocode - assume all Profiling API structures are already initialized correctly
+cuptiProfilerBeginSession(&beginSessionParams);
+cuptiProfilerSetConfig(&setConfigParams);
+int numPasses = 0;
+bool lastPass = false;
+do
+{
+  if (numPasses == 0)
+    cuptiCheckpointSave(&cp);
+  else
+    cuptiCheckpointRestore(&cp);
+
+  cuptiProfilerBeginPass(&beginPassParams);
+  cuptiProfilerEnableProfiling(&enableProfilingParams);
+  cuptiProfilerPushRange(&pushRangeParams);
+
+  // Kernel launch on N separate streams - will be profiled while running concurrently
+  kernel<<<..., stream0>>>(...);
+  kernel<<<..., stream1>>>(...);
+  ...
+  kernel<<<..., streamN>>>(...);
+
+  cudaStreamSynchronize(stream0);
+  cudaStreamSynchronize(stream1);
+  ...
+  cudaStreamSynchronize(streamN);
+
+  cuptiProfilerPopRange(&popRangeParams);
+  cuptiProfilerDisableProfiling(&disableProfilingParams);
+  lastPass = cuptiProfilerEndPass(&endPassParams);
+} while (lastPass == false);
+cuptiProfilerFlushCounterData(&flushCounterDataParams);
+cuptiProfilerUnsetConfig(&unsetConfigParams);
+cuptiProfilerEndSession(&endSessionParams);
+
+ In this example, the Profiler range will span all concurrently running kernels, which may modify their own input data - each + pass through the loop will restore the initial values. +
+
+
+
+
+
+

2.12. CUPTI overhead

+
+
+

CUPTI incurs overhead when used for tracing or profiling of the CUDA application. + Overhead can vary significantly from one application to another. It largely depends + on the density of the CUDA activities in the application; lesser the CUDA activities, + less the CUPTI overhead. + In general overhead of tracing i.e. activity APIs is much lesser than the profiling + i.e. event and metric APIs. + +

+
+
+
+

2.12.1. Tracing Overhead

+
+
+

One of the goal of the tracing APIs is to provide a non-invasive collection of the + timing information of the CUDA activities. Tracing is a low-overhead mechanism for + collecting fine-grained runtime information. + +

+
+
+
+
2.12.1.1. Execution overhead
+
+
+
Factors affecting the execution overhead under tracing are: + +
    +
  • Serial kernel trace enabled using the activity kind CUPTI_ACTIVITY_KIND_KERNEL can significantly + change the overall performance characteristics of the application because all kernel executions are serialized + on the GPU. For applications which use only a single CUDA stream and therefore cannot have concurrent kernel + execution, this mode can be useful as it usually (not always) incurs less profiling overhead compared to the + concurrent kernel mode. + +
  • +
  • Concurrent kernel trace enabled using the activity kind CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL + doesn't affect the concurrency of the kernels in the application. + In this mode, CUPTI instruments the kernel code to collect the timing information. A single instrumentation + code is generated at the time of loading the CUDA module and applied to each kernel during the kernel execution. + Instrumentation code generation overhead is attributed as CUPTI_ACTIVITY_OVERHEAD_CUPTI_INSTRUMENTATION + in the activity record CUpti_ActivityOverhead. +
  • +
  • Due to the code instrumentation, concurrent kernel mode can add significant runtime overhead if used + on kernels that execute a large number of blocks and that have short execution durations. +
  • +
+
+
+
+
+
+
2.12.1.2. Memory overhead
+
+
+
CUPTI allocates device and pinned system memory for storing the tracing information: + +
    +
  • Static memory allocation: CUPTI allocates 3 buffers of 3 MB each in the pinned system memory for each + CUDA context by default during the context creation phase. This is used for storing the concurrent kernel, serial kernel, + memcopy and memset tracing information and these buffers are sufficient for storing information for about 300K such + activities. The number of buffers is controlled using the attribute + CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_PRE_ALLOCATE_VALUE and the size of the buffer is determined by the attribute + CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE. User can change the buffer size at any time during the profiling + session, but this setting takes effect only for new buffer allocations. It is recommended to adjust the buffer size + before the creation of any CUDA context to make sure that all the pre-allocated buffers are of the adjusted size. +
  • +
  • Dynamic memory allocation: Once profiling buffers to store the tracing information are exhausted, + CUPTI allocates another buffer of the same size. + Note that memory footprint will not always scale with the kernel, memcopy, memset count because CUPTI reuses the + buffer after processing all the records in the buffer. For applications with a high density of these activities + CUPTI may allocate more buffers. +
  • +
+ + All of the CUPTI allocated memory associated with a context is freed when the context is destroyed. + Memory allocation overhead is attributed as CUPTI_ACTIVITY_OVERHEAD_CUPTI_RESOURCE in the activity record + CUpti_ActivityOverhead. If there are no CUDA contexts created then CUPTI will not allocate corresponding + buffers. + +
+
CUPTI allocates memory to store unique kernel names, NVTX ranges, CUDA module cubin: + +
    +
  • Kernel trace: For kernel tracing enabled using the activity kind CUPTI_ACTIVITY_KIND_KERNEL or + CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL CUPTI allocates memory to store the kernel name in the records. + It is recommended to not free the memory allocated for the kernel name in the kernel activity record as the kernel name + memory space might be common across all kernel records having the same kernel name. +
  • +
  • NVTX ranges: For NVTX enabled using the activity kind CUPTI_ACTIVITY_KIND_MARKER + CUPTI allocates memory to store the range name in the records. It is recommended to not free the memory allocated for + the NVTX range name in the marker activity record as the NVTX range name memory space will be common across all NVTX range + records having the same name. +
  • +
  • CUDA module cubin: CUPTI stores the data related to the modules loaded by the application i.e. the cubin image. + All of the CUPTI allocated memory associated with the cubin image of the module is freed when the module is unloaded. +
  • +
+
+
+
+
+
+
+

2.12.2. Profiling Overhead

+
+
+

Events and metrics collection using CUPTI incurs runtime overhead. This overhead depends on the number + and type of events and metrics selected. Since each metric is computed from one or more events, metric + overhead depends on the number and type of underlying events. The overhead includes time spent in configuration + of hardware events and reading of hardware event values. + +

+
Factors affecting the execution overhead under profiling are: + +
    +
  • Overhead is less for hardware provided events and metrics. + +
      +
    • For event and metric APIs, events which are collected using the collection method + CUPTI_EVENT_COLLECTION_METHOD_PM or CUPTI_EVENT_COLLECTION_METHOD_SM fall in this category. +
    • +
    • For Profiling APIs, metrics which don't have string "sass" in the name fall in this category.
    • +
    +
  • +
  • Software instrumented events and metrics are expensive as CUPTI needs to instrument the kernel to collect + these. Further these events and metrics cannot be combined with any other event or metric in the same pass as + otherwise instrumented code will also contribute to the event value. + +
      +
    • For event and metric APIs, the collection method CUPTI_EVENT_COLLECTION_METHOD_INSTRUMENTED + fall in this cateogry. +
    • +
    • For Profiling APIs, metrics which have string "sass" in the name fall in this category.
    • +
    +
  • +
  • In the serial mode, profiling may significantly change the overall performance characteristics + of the application because all kernel executions are serialized on the GPU. This is done to enable tight + event or metric collection around each kernel. + +
      +
    • For event and metric APIs, the collection mode CUPTI_EVENT_COLLECTION_MODE_KERNEL, + serializes all kernel executions on the GPU that occur between the APIs cuptiEventGroupEnable + and cuptiEventGroupDisable. On the other hand, kernel concurrency can be maintained by using + the collection mode CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS and restricting profiling to + events and metrics that can be collected in a single pass. +
    • +
    • For Profiling APIs, auto range mode serializes all kernel executions on the GPU. On the other hand, + kernel concurrency can be maintained by using the user range mode. +
    • +
    +
  • +
  • When all the requested events or metrics cannot be collected in the single pass due to + hardware or software limitations, one needs to replay the exact same set of GPU workloads multiple + times. This can be achieved at the kernel granularity by replaying kernel multiple times or + by launching the entire application multiple times. + CUPTI provides support for kernel replay only. Application replay can be done by the + CUPTI client. +
  • +
  • When kernel replay is used the overhead to save and restore kernel state for each replay pass + depends on the amount of device memory used by the kernel. + Application replay is expected to perform better than kernel replay for the case when the size of + device memory used by the kernel is high. +
  • +
+
+
+
+
+
+
+

2.13. Samples

+
+
+

The CUPTI installation includes several samples that demonstrate the + use of the CUPTI APIs. These samples can be referred to for the usage of different APIs + supported by CUPTI. The samples are: +

+

Activity API

+
+
+ activity_trace_async +
+
This sample shows how to collect a trace of CPU and + GPU activity using the new asynchronous activity buffer + APIs. +
+
callback_timestamp
+
This sample shows how to use the callback + API to record a trace of API start and stop times. +
+
+ cuda_graphs_trace +
+
This sample shows how to collect the trace of CUDA graphs and correlate + the graph node launch to the node creation API using CUPTI callbacks. + +
+
cuda_memory_trace
+
This sample shows how to collect the trace of CUDA memory operations. + The sample also traces CUDA memory operations done via default memory pool. +
+
+ cupti_correlation +
+
This sample shows how to do the correlation between CUDA APIs and corresponding + GPU activities. +
+
+ cupti_external_correlation +
+
This sample shows how to do the correlation of CUDA API activity + records with external APIs. +
+
+ cupti_finalize +
+
This sample shows how to use API cuptiFinalize() + to dynamically detach and attach CUPTI. +
+
+ cupti_nvtx +
+
This sample shows how to receive NVTX callbacks and + collect NVTX records in CUPTI. +
+
+ cupti_trace_injection +
+
This sample shows how to build an injection library using the CUPTI activity + and callback APIs. It can be used to trace CUDA APIs and GPU activities for + any CUDA application. It does not require the CUDA application to be modified. +
+
+ nvlink_bandwidth +
+
+ This sample shows how to collect NVLink topology and NVLink throughput metrics in + continuous mode. + +
+
+ openacc_trace +
+
This sample shows how to use CUPTI APIs for OpenACC data collection.
+
+ pc_sampling +
+
This sample shows how to collect PC Sampling profiling information + for a kernel using the PC Sampling Activity APIs. +
+
+ sass_source_map +
+
This sample shows how to generate CUpti_ActivityInstructionExecution records + and how to map SASS assembly instructions to CUDA C source. +
+
unified_memory
+
This sample shows how to collect information about page transfers + for unified memory. +
+
+

Event and Metric APIs

+
+
callback_event
+
This sample shows how to use both the callback + and event APIs to record the events that occur during the execution + of a simple kernel. The sample shows the required ordering for + synchronization, and for event group enabling, disabling, and + reading. +
+
callback_metric
+
This sample shows how to use both the + callback and metric APIs to record the metric's events during the + execution of a simple kernel, and then use those events to calculate + the metric value. +
+
cupti_query
+
This sample shows how to query CUDA-enabled + devices for their event domains, events, and metrics. +
+
+ event_multi_gpu +
+
This sample shows how to use the CUPTI event and CUDA APIs to sample + events on a setup with multiple GPUs. The sample shows the required + ordering for synchronization, and for event group enabling, + disabling, and reading. +
+
event_sampling
+
This sample shows how to use the event + APIs to sample events using a separate host thread. +
+
+

Profiling API

+
+
+ extensions +
+
This includes utilities used in some of the samples.
+
+ autorange_profiling +
+
This sample shows how to use profiling APIs to collect metrics in autorange mode.
+
callback_profiling
+
This sample shows how to use callback and profiling APIs to collect the metrics + during the execution of a kernel. It shows how to use different phases of profiling + i.e. enumeration, configuration, collection and evaluation in the appropriate callbacks. + +
+
+ concurrent_profiling +
+
This sample shows how to use the profiling + API to record metrics from concurrent kernels launched in two + different ways - using multiple streams on a single device, and + using multiple threads with multiple devices. +
+
+ cupti_metric_properties +
+
This sample shows how to query various properties of metrics using the Profiling APIs. + The sample shows collection method (hardware or software) and number of passes required to collect a list of metrics. + +
+
+ nested_range_profiling +
+
This sample shows how to profile nested ranges using the Profiling APIs. + +
+
+ profiling_injection +
+
This sample for Linux systems shows how to build an injection library which can + automatically enable CUPTI's Profiling API using Auto Ranges with Kernel Replay mode. It can + attach to an application which was not instrumented using CUPTI and profile any kernel launches. +
+
+ userrange_profiling +
+
This sample shows how to use profiling APIs to collect metrics in user specified + range mode. +
+
+

PC Sampling API

+
+
+ pc_sampling_continuous +
+
This injection sample shows how to collect PC Sampling profiling information using + the PC Sampling APIs. A perl script libpc_sampling_continuous.pl is provided to run + the CUDA application with different PC sampling options. + Use the command './libpc_sampling_continuous.pl --help' to list all the options. + The CUDA application code does not need to be modified. Refer the README.txt file + shipped with the sample for instructions to build and use the injection library. +
+
+ pc_sampling_start_stop +
+
This sample shows how to collect PC Sampling profiling information + for kernels within a range using the PC Sampling start/stop APIs. +
+
+ pc_sampling_utility +
+
This utility takes the pc sampling data file generated by the pc_sampling_continuous + injection library as input. It prints the stall reason counter values at the GPU assembly + instruction level. It also does GPU assembly to CUDA-C source correlation and shows the + CUDA-C source file name and line number. Refer the README.txt file shipped with the + sample for instructions to build and run the utility. +
+
+

Checkpoint API

+
+
+ checkpoint_kernels +
+
This sample shows how to use the Checkpoint API to restore + device memory, allowing a kernel to be replayed, even if it + modifies its input data. +
+
+
+
+
+
+ +
+ +
+
+
+ + + + + + \ No newline at end of file diff --git a/doc/Cupti/r_overview.html b/doc/Cupti/r_overview.html new file mode 100644 index 0000000000000000000000000000000000000000..213851bb3c27244ddf37f7cae1acbdafb5492ca9 --- /dev/null +++ b/doc/Cupti/r_overview.html @@ -0,0 +1,971 @@ + + + + + + + + + + + + + + CUPTI :: CUPTI Documentation + + + --> + + + + + + + + + + + + + + + + + + +
+ +
+ + +
+ +
+
+

Overview

+
+
+

The CUDA Profiling Tools Interface (CUPTI) enables the creation + of profiling and tracing tools that target CUDA applications. CUPTI + provides the following APIs: the Activity API, the Callback API, + the Event API, the Metric API, the Profiling API, + the PC Sampling API and the Checkpoint API. + Using these APIs, you can develop profiling tools that give insight into + the CPU and GPU behavior of CUDA applications. CUPTI is delivered as a + dynamic library on all platforms supported by CUDA. +

+
+
+

In this CUPTI document, Tracing refers to the collection of timestamps and + additional information for CUDA activities such as CUDA APIs, kernel launches and memory copies + during the execution of a CUDA application. Tracing helps in identifying performance issues + for the CUDA code by telling you which parts of a program require the most time. + Tracing information can be collected using the Activity and Callback APIs. +

+
+
+

In this CUPTI document, Profiling refers to the collection of GPU performance metrics + for a single kernel or a set of kernels in isolation. + Profiling might involve multiple replays of the kernel/s or the entire application to + collect GPU performance metrics. + For Volta and earlier GPU architectures, these metrics can be collected using CUPTI + Event and Metric APIs. For Volta and later GPU architectures, + the low overhead CUPTI Profiling and Perfworks Metric APIs replace this functionality, + and a new CUPTI PC Sampling API is supported. +

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1. Description of CUPTI APIs
CUPTI APIFeature Description
ActivityAsynchronously record CUDA activities, e.g. CUDA API, Kernel, + memory copy +
CallbackCUDA event callback mechanism to notify subscriber that a specific + CUDA event executed e.g. "Entering CUDA runtime memory copy" +
EventCollect kernel performance counters for a kernel execution
MetricCollect kernel performance metrics for a kernel execution
ProfilingCollect performance metrics for a range of execution
PC SamplingCollect continuous mode PC Sampling data without serializing + kernel execution +
CheckpointProvides support for automatically saving and restoring the + functional state of the CUDA device +
+
+
+
+ +
+ +
+
+
+ + + + + + \ No newline at end of file diff --git a/doc/Cupti/r_special_configurations.html b/doc/Cupti/r_special_configurations.html new file mode 100644 index 0000000000000000000000000000000000000000..b079d51a6d51043e96376abc7e125e2f233110cb --- /dev/null +++ b/doc/Cupti/r_special_configurations.html @@ -0,0 +1,986 @@ + + + + + + + + + + + + + + CUPTI :: CUPTI Documentation + + + --> + + + + + + + + + + + + + + + + + + +
+ +
+ + +
+ +
+
+

4. Special Configurations

+
+

4.1. Multi-Instance GPU (MIG)

+
+
+

Multi-Instance GPU (MIG) is a feature that allows a GPU to be partitioned into multiple CUDA devices. + The partitioning is carried out on two levels: + First, a GPU can be split into one or multiple GPU Instances. Each GPU Instance + claims ownership of one or more streaming multiprocessors (SM), a subset of the overall GPU memory, and + possibly other GPU resources, such as the video encoders/decoders. + Second, each GPU Instance can be further partitioned into one or more Compute Instances. + Each Compute Instance has exclusive ownership of its assigned SMs of the GPU Instance. + However, all Compute Instances within a GPU Instance share the GPU Instance's + memory and memory bandwidth. Every Compute Instance acts and operates as a CUDA device with a unique device ID. + + See the driver release notes as well as the documentation for the nvidia-smi CLI tool for more + information on how to configure MIG instances. + +

+

+ From the profiling perspective, a Compute Instance can be of one of two types: isolated + or shared. + +

+

+ An isolated Compute Instance owns all of it's assigned resources and does not share any GPU + unit with another Compute Instance. In other words, the Compute Instance is of the same size + as its parent GPU Instance and consequently does not have any other sibling Compute Instances. + Tracing and Profiling works for isolated Compute Instances. + +

+

+ A shared Compute Instance uses GPU resources that can potentially also be accessed by other + Compute Instances in the same GPU Instance. Due to this resource sharing, collecting + profiling data from shared units is not permitted. Attempts to collect metrics from a shared unit will result + in NaN values. Better error reporting will be done in a future release. Collecting metrics from GPU units that + are exclusively owned by a shared Compute Instance is still possible. + Tracing works for shared Compute Instances. + +

+

+ To allow users to determine which metrics are available on a target device, new APIs have been added which can + be used to query counter availability before starting the profiling session. + See APIs NVPW_RawMetricsConfig_SetCounterAvailability and cuptiProfilerGetCounterAvailability. + +

+

+ All Compute Instances on a GPU share the same clock frequencies. To get consistent metric values with + multi-pass collection, it is recommended to lock the GPU clocks during the profiling session. + CLI tool nvidia-smi can be used to configure a fixed frequency for the whole GPU by calling + nvidia-smi --lock-gpu-clocks=tdp,tdp. This sets the GPU clocks to the base TDP frequency until + you reset the clocks by calling nvidia-smi --reset-gpu-clocks. + +

+
+
+
+
+

4.2. NVIDIA Virtual GPU (vGPU)

+
+
+

+ CUPTI supports tracing and profiling features on NVIDIA virtual GPUs (vGPUs). + Activity, Callback and Profiling APIs are supported but Event and Metric APIs are not supported + on NVIDIA vGPUs. + If you want to use profiling features that NVIDIA vGPU supports, you must enable them + for each vGPU VM that requires them. These can be enabled by setting a vGPU plugin + parameter enable_profiling. How to set the parameter for a vGPU VM depends + on the hypervisor that you are using. + Tracing is enabled by default, it doesn't require any specific setting. However tracing results + might not be accurate after virtual machine (VM) migration. Therefore it is recommended to set the vGPU + plugin parameter enable_profiling for accurate results. + Refer to the NVIDIA Virtual GPU Software documentation for + the list of supported GPUs, + how to enable profiling features using the vGPU plugin parameter and for + limitations on use of CUPTI with NVIDIA vGPU. + +

+
+
+
+
+

4.3. Windows Subsystem for Linux (WSL)

+
+
+

+ WSL or Windows Subsystem for Linux is a Windows feature that enables users to run native + Linux applications, containers and command-line tools directly on Windows 11 and later OS builds. + CUPTI supports tracing APIs Activity and Callback on the second generation of + WSL (WSL 2) on Volta and later GPU architectures. + Profiler APIs Event, Metric, Profiling and PC Sampling are not + supported on WSL. + +

+
+
+
+
+ +
+ +
+
+
+ + + + + + \ No newline at end of file diff --git a/doc/Cupti/release_notes.html b/doc/Cupti/release_notes.html new file mode 100644 index 0000000000000000000000000000000000000000..cd09516a216c11c24443bc20f7cda6ef4cf085d7 --- /dev/null +++ b/doc/Cupti/release_notes.html @@ -0,0 +1,2415 @@ + + + + + + + + + + + + + + + + + + + CUPTI :: CUPTI Documentation + + + --> + + + + + + + + + + + + + + + + + + +
+ +
+ + +
+ +
+
+

1. Release Notes

+
+

CUPTI Release Notes.

+

+ Release notes, including new features and important bug fixes. + Supported platforms and GPUs. + +

+
+
+

Release Notes

+
+
+

1.1.1. Updates in CUDA 11.8

+
+
+
New Features
    +
  • CUPTI adds tracing and profiling support for devices with compute capability 9.0 i.e. NVIDIA H100 GPUs + and systems that are based on H100 GPUs. +
  • +
  • Added new fields clusterX, clusterY, clusterZ and + clusterSchedulingPolicy to output the Thread Block Cluster dimensions and scheduling + policy. These fields are valid for devices with compute capability 9.0 and higher. + To accomodate this change, activity record CUpti_ActivityKernel7 is deprecated and + replaced by a new activity record CUpti_ActivityKernel8. +
  • +
  • A new activity kind CUPTI_ACTIVITY_KIND_JIT and corresponding activity record + CUpti_ActivityJit are introduced to capture the overhead involved in the JIT (just-in-time) + compilation and caching of the PTX or NVVM IR code to the binary code. New record also provides the information + about the size and path of the compute cache where the binary code is stored. + +
  • +
  • PC Sampling API is supported on Tegra platforms - QNX, Linux (aarch64) and Linux (x86_64) (Drive SDK).
  • +
Resolved Issues
    +
  • Resolved an issue that might cause crash when the size of the device buffer is + changed, using the attribute CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, after creation of + the CUDA context. +
  • +
+
+
+
+
+
+

1.1.2. Updates in CUDA 11.7 Update 1

+
+
+
Resolved Issues
    +
  • Resolved an issue for PC Sampling API cuptiPCSamplingGetData + which might not always return all the samples when called after the PC sampling range defined by using + the APIs cuptiPCSamplingStart and cuptiPCSamplingStop. + Remaining samples were delivered in the successive call of the API cuptiPCSamplingGetData + after the next range. +
  • +
  • Disabled tracing of nodes in the CUDA Graph when user enables tracing at the Graph + level using the activity kind CUPTI_ACTIVITY_KIND_GRAPH_TRACE. +
  • +
  • Fixed missing channelID and channelType information + for kernel records. Earlier these fields were populated for CUDA Graph launches only. + +
  • +
+
+
+
+
+
+

1.1.3. Updates in CUDA 11.7

+
+
+
New Features
    +
  • A new activity kind CUPTI_ACTIVITY_KIND_GRAPH_TRACE and activity record + CUpti_ActivityGraphTrace are introduced to represent the execution for a graph without + giving visibility about the execution of its nodes. This is intended to reduce overheads involved in + tracing each node separately. This activity can only be enabled for drivers of version 515 and above. +
  • +
  • A new API cuptiActivityEnableAndDump is added to provide snapshot of certain activities + like device, context, stream, NVLink and PCIe at any point during the profiling session. +
  • +
  • Added sample cupti_correlation + to show correlation between CUDA APIs and corresponding GPU activities. +
  • +
  • Added sample cupti_trace_injection + to show how to build an injection library using the activity and callback APIs which can be used to trace + any CUDA application. +
  • +
Resolved Issues
    +
  • Fixed corruption in the function name for PC Sampling API records.
  • +
  • Fixed incorrect timestamps for GPU activities when user calls the API + cuptiActivityRegisterTimestampCallback in the late CUPTI attach scenario. +
  • +
  • Fixed incomplete records for device to device memcopies in the late CUPTI attach scenario. + This issue manifests mainly when application has a mix of CUDA graph and normal kernel launches. +
  • +
+
+
+
+
+
+

1.1.4. Updates in CUDA 11.6 Update 1

+
+
+
Resolved Issues
    +
  • Fixed hang for the PC Sampling API cuptiPCSamplingStop. This issue + is seen for the PC sampling start and stop resulting in generation of large number of sampling records. +
  • +
  • Fixed timing issue for specific device to device memcpy operations.
  • +
+
+
+
+
+
+

1.1.5. Updates in CUDA 11.6

+
+
+
New Features
    +
  • Two new fields channelID and channelType are added in the activity records + for kernel, memcpy, peer-to-peer memcpy and memset to output the ID and type of the hardware channel on which + these activities happen. + Activity records CUpti_ActivityKernel6, CUpti_ActivityMemcpy4, CUpti_ActivityMemcpyPtoP3 + and CUpti_ActivityMemset3 are deprecated and replaced by new activity records CUpti_ActivityKernel7, + CUpti_ActivityMemcpy5, CUpti_ActivityMemcpyPtoP4 and CUpti_ActivityMemset4. + +
  • +
  • New fields isMigEnabled, gpuInstanceId, computeInstanceId and + migUuid are added in the device activity record to provide MIG information for the MIG enabled GPU. + Activity record CUpti_ActivityDevice3 is deprecated and replaced by a new activity record + CUpti_ActivityDevice4. +
  • +
  • A new field utilizedSize is added in the memory pool and memory activity record to provide the utilized size + of the memory pool. Activity record CUpti_ActivityMemoryPool and CUpti_ActivityMemory2 are deprecated + and replaced by a new activity record CUpti_ActivityMemoryPool2 and CUpti_ActivityMemory3 respectively. +
  • +
  • API cuptiActivityRegisterTimestampCallback and callback function CUpti_TimestampCallbackFunc + are added to register a callback function to obtain timestamp of user's choice instead of using CUPTI provided timestamp + in activity records. + +
  • +
  • Profiling API supports profiling OptiX application.
  • +
Resolved Issues
    +
  • Fixed multi-pass metric collection using the Profiling API in the auto range and kernel replay mode + for Cuda Graph. +
  • +
  • Fixed the performance issue for the PC sampling API cuptiPCSamplingStop. +
  • +
  • Fixed corruption in variable names for OpenACC activity records.
  • +
  • Fixed corruption in the fields of the struct memoryPoolConfig in the activity record CUpti_ActivityMemory3. +
  • +
  • Filled the fields of the struct memoryPoolConfig in the activity record CUpti_ActivityMemory3 + when a memory pointer allocated via memory pool is released using cudaFree CUDA API. +
  • +
+
+
+
+
+
+

1.1.6. Updates in CUDA 11.5 Update 1

+
+
+
Resolved Issues
    +
  • Resolved an issue that causes incorrect range name for NVTX event attributes. + The issue was introduced in CUDA 11.4. +
  • +
  • Made NVTX initialization APIs InitializeInjectionNvtx and + InitializeInjectionNvtx2 thread-safe. +
  • +
+
+
+
+
+
+

1.1.7. Updates in CUDA 11.5

+
+
+
New Features
    +
  • A new API cuptiProfilerDeviceSupported is introduced to expose overall Profiling API + support and specific requirements for a given device. Profiling API must be initialized by calling + cuptiProfilerInitialize before testing device support. +
  • +
  • PC Sampling struct CUpti_PCSamplingData introduces a new field nonUsrKernelsTotalSamples + to provide information about the number of samples collected for all non-user kernels. +
  • +
  • Activity record CUpti_ActivityDevice2 for device information has been deprecated and + replaced by a new activity record CUpti_ActivityDevice3. New record adds a flag isCudaVisible + to indicate whether device is visible to CUDA. +
  • +
  • Activity record CUpti_ActivityNvLink3 for NVLink information has been deprecated and + replaced by a new activity record CUpti_ActivityNvLink4. New record can accommodate NVLink port + information upto a maximum of 32 ports. +
  • +
  • A new CUPTI Checkpoint API is introduced, enabling automatic saving + and restoring of device state, and facilitating development of kernel replay tools. This is helpful for User + Replay mode of the CUPTI Profiling API, but is not limited to use with CUPTI. +
  • +
  • Tracing is supported on the Windows Subsystem for Linux version 2 (WSL2).
  • +
  • CUPTI is not supported on NVIDIA Crypto Mining Processors (CMP). A new error code + CUPTI_ERROR_CMP_DEVICE_NOT_SUPPORTED is introduced to indicate it. +
  • +
Resolved Issues
    +
  • Resolved an issue that causes crash for tracing of device to device memcopy operations.
  • +
  • Resolved an issue that causes crash for OpenACC activity when it is enabled before other activities.
  • +
+
+
+
+
+
+

1.1.8. Updates in CUDA 11.4 Update 1

+
+
+
Resolved Issues
    +
  • Resolved serialization of CUDA Graph launches for applications which use multiple + threads to launch work. +
  • +
  • Previously, for applications that use CUDA Dynamic Parallelism (CDP), CUPTI detects the + presence of the CDP kernels in the CUDA module. Even if CDP kernels are not called, it fails to trace + the application. There is a change in the behavior, CUPTI now traces all the host launched kernels until + it encounters a host launched kernel which launches child kernels. Subsequent kernels are not traced. + +
  • +
+
+
+
+
+
+

1.1.9. Updates in CUDA 11.4

+
+
+
New Features
    +
  • Profiling APIs support profiling of the CUDA kernel nodes launched by a CUDA Graph. + Auto range profiling with kernel replay mode and user range profiling with user replay and + application replay modes are supported. Other combinations of range profiling and replay modes + are not supported. +
  • +
  • Added support for tracing and profiling on + NVIDIA virtual GPUs + (vGPUs) on an upcoming GRID/vGPU release. +
  • +
  • Added sample profiling_injection + to show how to build injection library using the Profiling API. +
  • +
  • Added sample concurrent_profiling + to show how to retain the kernel concurrency across streams and devices using the Profiling API. +
  • +
Resolved Issues
    +
  • Resolved the issue of not tracing the device to device memcopy nodes in a CUDA Graph.
  • +
  • Fixed the issue of reporting zero size for local memory pool for mempool creation record.
  • +
  • Resolved the issue of non-collection of samples for the default CUDA context for PC Sampling API.
  • +
  • Enabled tracking of all domains and registered strings in NVTX irrespective of + whether the NVTX activity kind or callbacks are enabled. This state tracking is needed for proper working of the tool + which creates these NVTX objects before enabling the NVTX activity kind or callback. +
  • +
+
+
+
+
+
+

1.1.10. Updates in CUDA 11.3

+
+
+
New Features
    +
  • A new set of CUPTI APIs for PC sampling data collection are provided in the header file cupti_pcsampling.h + which support continuous mode data collection without serializing kernel execution and have a lower runtime overhead. + Along with these a utility library is provided in the header file cupti_pcsampling_util.h which has APIs for GPU + assembly to CUDA-C source correlation and for reading and writing the PC sampling data from/to files. + Refer to the section CUPTI PC Sampling API for more details. +
  • +
  • Enum CUpti_PcieGen is extended to include PCIe Gen 5. +
  • +
  • The following functions are deprecated and will be removed in a future release: + +
      +
    • Struct NVPA_MetricsContext and related APIs NVPW_MetricsContext_* from the header nvperf_host.h. + It is recommended to use the struct NVPW_MetricsEvaluator and related APIs NVPW_MetricsEvaluator_* + instead. Profiling API samples have been updated to show how to use these APIs. +
    • +
    • cuptiDeviceGetTimestamp from the header cupti_events.h. +
    • +
    +
  • +
Resolved Issues
    +
  • Overhead reduction for tracing of CUDA memcopies.
  • +
  • To provide normalized timestamps for all activities, CUPTI uses linear interpolation for + conversion from GPU timestamps to CPU timestamps. This method can cause spurious gaps or overlap on the timeline. + CUPTI improves the conversion function to provide more precise timestamps. +
  • +
  • Generate overhead activity record for semaphore pool allocation.
  • +
+
+
+
+
+
+

1.1.11. Updates in CUDA 11.2

+
+
+
New Features
    +
  • A new activity kind CUPTI_ACTIVITY_KIND_MEMORY_POOL and activity record CUpti_ActivityMemoryPool + are introduced to represent the creation, destruction and trimming of a memory pool. Enum CUpti_ActivityMemoryPoolType + lists types of memory pool. +
  • +
  • A new activity kind CUPTI_ACTIVITY_KIND_MEMORY2 and activity record CUpti_ActivityMemory2 + are introduced to provide separate records for memory allocation and release operations. This helps in correlation + of records of these operations to the corresponding CUDA APIs, which otherwise is not possible using the existing + activity record CUpti_ActivityMemory which provides a single record for both the memory operations. +
  • +
  • Added a new pointer field of type CUaccessPolicyWindow in the kernel activity record to provide the + access policy window which specifies a contiguous region of global memory and a persistence property in the L2 cache + for accesses within that region. To accomodate this change, activity record CUpti_ActivityKernel5 is + deprecated and replaced by a new activity record CUpti_ActivityKernel6. + This attribute is not collected by default. To control the collection of launch attributes, a new API + cuptiActivityEnableLaunchAttributes is introdcued. +
  • +
  • New attributes CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_PRE_ALLOCATE_VALUE and + CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_PRE_ALLOCATE_VALUE are added in the activity attribute + enum CUpti_ActivityAttribute to set and get the number of device buffers and profiling semaphore pools + which are preallocated for the context. +
  • +
  • CUPTI now allocates profiling buffer for concurrent kernel tracing in the pinned host memory in place of + device memory. This might help in improving the performance of the tracing run. Memory location can be controlled + using the attribute CUPTI_ACTIVITY_ATTR_MEM_ALLOCATION_TYPE_HOST_PINNED of the activity attribute enum + CUpti_ActivityAttribute. +
  • +
  • The compiler generated line information for inlined functions is improved due to which CUPTI can associate + inlined functions with the line information of the function call site that has been inlined. +
  • +
  • Removed support for NVLink performance metrics (nvlrx__* and nvltx__*) from + the Profiling API due to a potential application hang during data collection. The metrics will be added back in a + future CUDA release. +
  • +
Resolved Issues
    +
  • Execution overheads introduced by CUPTI in the tracing path is reduced.
  • +
  • For the concurrent kernel activity kind CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL, + CUPTI instruments the kernel code to collect the timing information. Previously, every kernel in the CUDA module + was instrumented, thus the overhead is proportional to the number of different kernels in the module. This is a + static overhead which happens at the time of loading the CUDA module. + To reduce this overhead, kernels are not instrumented at the module load time, instead a single + instrumentation code is generated at the time of loading the CUDA module and it is applied to each kernel during + the kernel execution, thus avoiding most of the static overhead at the CUDA module load time. + +
  • +
+
+
+
+
+
+

1.1.12. Updates in CUDA 11.1

+
+
+
New Features
    +
  • CUPTI adds tracing and profiling support for the NVIDIA Ampere GPUs with compute capability 8.6.
  • +
  • Added a new field graphId in the activity records for kernel, memcpy, peer-to-peer memcpy and memset + to output the unique ID of the CUDA graph that launches the activity through CUDA graph APIs. + To accomodate this change, activity records CUpti_ActivityMemcpy3, CUpti_ActivityMemcpyPtoP2 and + CUpti_ActivityMemset2 are deprecated and replaced by new activity records CUpti_ActivityMemcpy4, + CUpti_ActivityMemcpyPtoP3 and CUpti_ActivityMemset3. + And kernel activity record CUpti_ActivityKernel5 replaces the padding field with graphId. + Added a new API cuptiGetGraphId to query the unique ID of the CUDA graph. +
  • +
  • Added a new API cuptiActivityFlushPeriod to set the flush period for the worker thread. +
  • +
  • Added support for profiling cooperative kernels using Profiling APIs.
  • +
  • Added NVLink performance metrics (nvlrx__* and nvltx__*) using the Profiling APIs. These metrics are available + on devices with compute capability 7.0, 7.5 and 8.0, and these can be collected at the context level. + Refer to the table Metrics Mapping Table for mapping between earlier CUPTI metrics and the + Perfworks NVLink metrics for devices with compute capability 7.0. +
  • +
Resolved Issues
    +
  • Resolved an issue that causes CUPTI to not return full and completed activity buffers for a long + time, CUPTI now attempts to return buffers early. +
  • +
  • To reduce the runtime overhead, CUPTI wakes up the worker thread based on certain heuristics + instead of waking it up at a regular interval. + New API cuptiActivityFlushPeriod can be used to control the flush period of the worker thread. + This setting overrides the CUPTI heurtistics. +
  • +
+
+
+
+
+
+

1.1.13. Updates in CUDA 11.0

+
+
+
New Features
    +
  • CUPTI adds tracing and profiling support for devices with compute capability 8.0 i.e. NVIDIA A100 GPUs + and systems that are based on A100. +
  • +
  • Enhancements for CUDA Graph: + +
      +
    • Support to correlate the CUDA Graph node with the GPU activities: kernel, memcpy, memset. + +
        +
      • Added a new field graphNodeId for Node Id in the activity records for kernel, memcpy, memset and P2P transfers. + Activity records CUpti_ActivityKernel4, CUpti_ActivityMemcpy2, CUpti_ActivityMemset and + CUpti_ActivityMemcpyPtoP are deprecated and replaced by new activity records CUpti_ActivityKernel5, + CUpti_ActivityMemcpy3, CUpti_ActivityMemset2 and CUpti_ActivityMemcpyPtoP2. +
      • +
      • graphNodeId is the unique ID for the graph node. +
      • +
      • graphNodeId can be queried using the new CUPTI API cuptiGetGraphNodeId(). +
      • +
      • Callback CUPTI_CBID_RESOURCE_GRAPHNODE_CREATED is issued between a pair of the API enter and exit callbacks. +
      • +
      +
    • +
    • Introduced new callback CUPTI_CBID_RESOURCE_GRAPHNODE_CLONED to indicate the cloning of the CUDA Graph node. +
    • +
    • Retain CUDA driver performance optimization in case memset node is sandwiched between kernel nodes. + CUPTI no longer disables the conversion of memset nodes into kernel nodes for CUDA graphs. +
    • +
    • Added support for cooperative kernels in CUDA graphs.
    • +
    +
  • +
  • Added support to trace Optix applications. Refer the Optix Profiling section. +
  • +
  • CUPTI overhead is associated with the thread rather than process. Object kind of the overhead record + CUpti_ActivityOverhead is switched to CUPTI_ACTIVITY_OBJECT_THREAD. +
  • +
  • Added error code CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED to indicate the presense of another + CUPTI subscriber. API cuptiSubscribe() returns the new error code than CUPTI_ERROR_MAX_LIMIT_REACHED. +
  • +
  • Added a new enum CUpti_FuncShmemLimitConfig to indicate whether user has opted in for maximun dynamic shared memory size + on devices with compute capability 7.x by using function attributes CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES + or cudaFuncAttributeMaxDynamicSharedMemorySize with CUDA driver and runtime respectively. + Field shmemLimitConfig in the kernel activity record CUpti_ActivityKernel5 shows the user choice. + This helps in correct occupancy calulation. Value FUNC_SHMEM_LIMIT_OPTIN in the enum cudaOccFuncShmemConfig + is the corresponding option in the CUDA occupancy calculator. +
  • +
Resolved Issues
    +
  • Resolved an issue that causes incorrect or stale timing for memcopy and serial kernel activities.
  • +
  • Overhead for PC Sampling Activity APIs is reduced by avoiding the reconfiguration of the GPU when PC sampling period doesn't + change between successive kernels. This is applicable for devices with compute capability 7.0 and higher. +
  • +
  • Fixed issues in the API cuptiFinalize() including the issue which may cause the application to crash. + This API provides ability for safe and full detach of CUPTI during the execution of the application. + More details in the section Dynamic Detach. + +
  • +
+
+
+
+
+
+

1.1.14. Updates in CUDA 10.2

+
+
+
New Features
    +
  • CUPTI allows tracing features for non-root and non-admin users on desktop platforms. + Note that events and metrics profiling is still restricted for non-root and non-admin users. + More details about the issue and the solutions can be found on this + web page. +
  • +
  • CUPTI no longer turns off the performance characteristics of CUDA Graph when tracing + the application. +
  • +
  • CUPTI now shows memset nodes in the CUDA graph.
  • +
  • Fixed the incorrect timing issue for the asynchronous cuMemset/cudaMemset activity.
  • +
  • Several performance improvements are done in the tracing path.
  • +
+
+
+
+
+
+

1.1.15. Updates in CUDA 10.1 Update 2

+
+
+
New Features
    +
  • This release is focused on bug fixes and stability of the CUPTI.
  • +
  • A security vulnerability issue required profiling tools to disable all the features + for non-root or non-admin users. As a result, CUPTI cannot profile the application when + using a Windows 419.17 or Linux 418.43 or later driver. More details about the issue + and the solutions can be found on this + web page. +
  • +
+
+
+
+
+
+

1.1.16. Updates in CUDA 10.1 Update 1

+
+
+
New Features
    +
  • Support for the IBM POWER platform is added for the + +
      +
    • Profiling APIs in the header cupti_profiler_target.h
    • +
    • Perfworks metric APIs in the headers nvperf_host.h and nvperf_target.h
    • +
    +
  • +
+
+
+
+
+
+

1.1.17. Updates in CUDA 10.1

+
+
+
New Features
    +
  • This release is focused on bug fixes and performance improvements. + +
  • +
  • The new set of profiling APIs and Perfworks metric APIs which were introduced + in the CUDA Toolkit 10.0 are now integrated into the CUPTI library distributed in + the CUDA Toolkit. Refer to the sections + CUPTI Profiling API and + Perfworks Metric APIs for documentation + of the new APIs. + +
  • +
  • Event collection mode CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS is now + supported on all device classes including Geforce and Quadro. + +
  • +
  • Support for the NVTX string registration API nvtxDomainRegisterStringA().
  • +
  • Added enum CUpti_PcieGen to list PCIe generations. + +
  • +
+
+
+
+
+
+

1.1.18. Updates in CUDA 10.0

+
+
+
New Features
    +
  • Added tracing support for devices with compute capability 7.5.
  • +
  • A new set of metric APIs are added for devices with compute capability 7.0 and higher. + These provide low and deterministic profiling overhead on the target system. These APIs + are currently supported only on Linux x86 64-bit and Windows 64-bit platforms. + Refer to the + CUPTI web page for documentation and details to download the package with support for + these new APIs. + Note that both the old and new metric APIs are supported for compute capability 7.0. This is + to enable transition of code to the new metric APIs. But one cannot mix the usage of the old + and new metric APIs. + +
  • +
  • CUPTI supports profiling of OpenMP applications. OpenMP profiling + information is provided in the form of new activity records CUpti_ActivityOpenMp. + New API cuptiOpenMpInitialize is used to initialize profiling for supported + OpenMP runtimes. + +
  • +
  • Activity record for kernel CUpti_ActivityKernel4 provides shared memory size set + by the CUDA driver. + +
  • +
  • Tracing support for CUDA kernels, memcpy and memset nodes launched by a CUDA Graph. + +
  • +
  • Added support for resource callbacks for resources associated with the CUDA Graph. + Refer enum CUpti_CallbackIdResource for new callback IDs. + +
  • +
+
+
+
+
+
+

1.1.19. Updates in CUDA 9.2

+
+
+
New Features
    +
  • Added support to query PCI devices information which can be used to construct the PCIe topology. + See activity kind CUPTI_ACTIVITY_KIND_PCIE and related activity record CUpti_ActivityPcie. + +
  • +
  • To view and analyze bandwidth of memory transfers over PCIe topologies, new set of metrics + to collect total data bytes transmitted and recieved through PCIe are added. Those give + accumulated count for all devices in the system. These metrics are collected at the device level + for the entire application. And those are made available for devices with compute capability 5.2 + and higher. + +
  • +
  • CUPTI added support for new metrics: + +
      +
    • Instruction executed for different types of load and store
    • +
    • Total number of cached global/local load requests from SM to texture cache
    • +
    • Global atomic/non-atomic/reduction bytes written to L2 cache from texture cache
    • +
    • Surface atomic/non-atomic/reduction bytes written to L2 cache from texture cache
    • +
    • Hit rate at L2 cache for all requests from texture cache
    • +
    • Device memory (DRAM) read and write bytes
    • +
    • The utilization level of the multiprocessor function units that execute tensor core + instructions for devices with compute capability 7.0 +
    • +
    +
  • +
  • A new attribute CUPTI_EVENT_ATTR_PROFILING_SCOPE is added under + enum CUpti_EventAttribute to query the profiling scope of a event. + Profiling scope indicates if the event can be collected at the context + level or device level or both. See Enum CUpti_EventProfilingScope for + avaiable profiling scopes. + +
  • +
  • A new error code CUPTI_ERROR_VIRTUALIZED_DEVICE_NOT_SUPPORTED is added to + indicate that tracing and profiling on virtualized GPU is not supported. + +
  • +
+
+
+
+
+
+

1.1.20. Updates in CUDA 9.1

+
+
+
New Features
    +
  • Added a field for correlation ID in the activity record + CUpti_ActivityStream. + +
  • +
+
+
+
+
+
+

1.1.21. Updates in CUDA 9.0

+
+
+
New Features
    +
  • CUPTI extends tracing and profiling support for devices with compute + capability 7.0. + +
  • +
  • Usage of compute device memory can be tracked through CUPTI. A new activity record + CUpti_ActivityMemory and activity kind CUPTI_ACTIVITY_KIND_MEMORY are + added to track the allocation and freeing of memory. This activity record includes + fields like virtual base address, size, PC (program counter), timestamps for memory + allocation and free calls. + +
  • +
  • Unified memory profiling adds new events for thrashing, throttling, remote map and + device-to-device migration on 64 bit Linux platforms. New events are added + under enum CUpti_ActivityUnifiedMemoryCounterKind. + Enum CUpti_ActivityUnifiedMemoryRemoteMapCause lists possible causes for remote map events. + +
  • +
  • PC sampling supports wide range of sampling periods ranging from 2^5 cycles to 2^31 cycles + per sample. This can be controlled through new field samplingPeriod2 in the PC sampling + configuration struct CUpti_ActivityPCSamplingConfig. + +
  • +
  • Added API cuptiDeviceSupported() to check support for a compute device. + +
  • +
  • Activity record CUpti_ActivityKernel3 for kernel execution has been deprecated + and replaced by new activity record CUpti_ActivityKernel4. New record gives + information about queued and submit timestamps which can help to determine software + and hardware latencies associated with the kernel launch. These timestamps are not collected + by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection. + New field launchType of type CUpti_ActivityLaunchType can be used to + determine if it is a cooperative CUDA kernel launch. + +
  • +
  • Activity record CUpti_ActivityPCSampling2 for PC sampling has been deprecated + and replaced by new activity record CUpti_ActivityPCSampling3. New record accomodates + 64-bit PC Offset supported on devices of compute capability 7.0 and higher. + +
  • +
  • Activity record CUpti_ActivityNvLink for NVLink attributes has been deprecated + and replaced by new activity record CUpti_ActivityNvLink2. New record accomodates + increased port numbers between two compute devices. + +
  • +
  • Activity record CUpti_ActivityGlobalAccess2 for source level global accesses + has been deprecated and replaced by new activity record CUpti_ActivityGlobalAccess3. + New record accomodates 64-bit PC Offset supported on devices of compute capability 7.0 and higher. + +
  • +
  • New attributes CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_SIZE and + CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_LIMIT are added in the activity attribute + enum CUpti_ActivityAttribute to set and get the profiling semaphore pool size and + the pool limit. + +
  • +
+
+
+
+
+
+

1.1.22. Updates in CUDA 8.0

+
+
+
New Features
    +
  • Sampling of the program counter (PC) is enhanced to point out the true + latency issues, it indicates if the stall reasons for warps are actually causing + stalls in the issue pipeline. Field latencySamples of new activity record + CUpti_ActivityPCSampling2 provides true latency samples. This field is valid + for devices with compute capability 6.0 and higher. + See section PC Sampling for more details. + +
  • +
  • Support for NVLink topology information such as the pair of devices connected via NVLink, + peak bandwidth, memory access permissions etc is provided through new activity record + CUpti_ActivityNvLink. NVLink performance metrics for data transmitted/received, + transmit/receive throughput and respective header overhead for each physical link. + See section NVLink for more details. + +
  • +
  • CUPTI supports profiling of OpenACC applications. OpenACC profiling + information is provided in the form of new activity records CUpti_ActivityOpenAccData, + CUpti_ActivityOpenAccLaunch and CUpti_ActivityOpenAccOther. + This aids in correlating OpenACC constructs on the CPU with the corresponding + activity taking place on the GPU, and mapping it back to the source code. + New API cuptiOpenACCInitialize is used to initialize profiling for supported OpenACC runtimes. + See section OpenACC for more details. + +
  • +
  • Unified memory profiling provides GPU page fault events on devices with + compute capability 6.0 and 64 bit Linux platforms. Enum CUpti_ActivityUnifiedMemoryAccessType + lists memory access types for GPU page fault events and enum CUpti_ActivityUnifiedMemoryMigrationCause + lists migration causes for data transfer events. + +
  • +
  • Unified Memory profiling support is extended to Mac platform. + +
  • +
  • Support for 16-bit floating point (FP16) data format profiling. New metrics inst_fp_16, + flop_count_hp_add, flop_count_hp_mul, flop_count_hp_fma, flop_count_hp, flop_hp_efficiency, + half_precision_fu_utilization are supported. + Peak FP16 flops per cycle for device can be queried using the enum + CUPTI_DEVICE_ATTR_FLOP_HP_PER_CYCLE added to CUpti_DeviceAttribute. + +
  • +
  • Added new activity kinds CUPTI_ACTIVITY_KIND_SYNCHRONIZATION, + CUPTI_ACTIVITY_KIND_STREAM and CUPTI_ACTIVITY_KIND_CUDA_EVENT, + to support the tracing of CUDA synchronization constructs such as context, stream and + CUDA event synchronization. Synchronization details are provided in the form of new activity + record CUpti_ActivitySynchronization. Enum CUpti_ActivitySynchronizationType + lists different types of CUDA synchronization constructs. + +
  • +
  • APIs cuptiSetThreadIdType()/cuptiGetThreadIdType() to set/get + the mechanism used to fetch the thread-id used in CUPTI records. + Enum CUpti_ActivityThreadIdType lists all supported mechanisms. + +
  • +
  • Added API cuptiComputeCapabilitySupported() to check the support for a specific + compute capability by the CUPTI. + +
  • +
  • Added support to establish correlation between an external API (such as OpenACC, OpenMP) + and CUPTI API activity records. APIs cuptiActivityPushExternalCorrelationId() and + cuptiActivityPopExternalCorrelationId() should be used to push and pop external + correlation ids for the calling thread. Generated records of type + CUpti_ActivityExternalCorrelation contain both external and CUPTI assigned correlation ids. + +
  • +
  • Added containers to store the information of events and metrics in the form of activity records + CUpti_ActivityInstantaneousEvent, CUpti_ActivityInstantaneousEventInstance, + CUpti_ActivityInstantaneousMetric and CUpti_ActivityInstantaneousMetricInstance. + These activity records are not produced by the CUPTI, these are included for completeness and + ease-of-use. Profilers built on top of CUPTI that sample events may choose to use these records + to store the collected event data. + +
  • +
  • Support for domains and annotation of synchronization objects added in NVTX v2. + New activity record CUpti_ActivityMarker2 and enums to indicate various + stages of synchronization object i.e. + CUPTI_ACTIVITY_FLAG_MARKER_SYNC_ACQUIRE, + CUPTI_ACTIVITY_FLAG_MARKER_SYNC_ACQUIRE_SUCCESS, + CUPTI_ACTIVITY_FLAG_MARKER_SYNC_ACQUIRE_FAILED and + CUPTI_ACTIVITY_FLAG_MARKER_SYNC_RELEASE are added. + +
  • +
  • Unused field runtimeCorrelationId of the activity record CUpti_ActivityMemset + is broken into two fields flags and memoryKind to indicate the asynchronous + behaviour and the kind of the memory used for the memset operation. + It is supported by the new flag CUPTI_ACTIVITY_FLAG_MEMSET_ASYNC added in + the enum CUpti_ActivityFlag. + +
  • +
  • Added flag CUPTI_ACTIVITY_MEMORY_KIND_MANAGED in the enum CUpti_ActivityMemoryKind + to indicate managed memory. + +
  • +
  • API cuptiGetStreamId has been deprecated. A new API cuptiGetStreamIdEx is + introduced to provide the stream id based on the legacy or per-thread default stream flag. + +
  • +
+
+
+
+
+
+

1.1.23. Updates in CUDA 7.5

+
+
+
New Features
    +
  • Device-wide sampling of the program counter (PC) is enabled by default. + This was a preview feature in the CUDA Toolkit 7.0 release and it was not enabled by default. + +
  • +
  • Ability to collect all events and metrics accurately in presence of multiple contexts on + the GPU is extended for devices with compute capability 5.x. + +
  • +
  • API cuptiGetLastError is introduced to return the last error that has been + produced by any of the CUPTI API calls or the callbacks in the same host thread. + +
  • +
  • Unified memory profiling is supported with MPS (Multi-Process Service) + +
  • +
  • Callback is provided to collect replay information after every kernel run during + kernel replay. See API cuptiKernelReplaySubscribeUpdate and callback type + CUpti_KernelReplayUpdateFunc. + +
  • +
  • Added new attributes in enum CUpti_DeviceAttribute to query maximum shared + memory size for different cache preferences for a device function. + +
  • +
+
+
+
+
+
+

1.1.24. Updates in CUDA 7.0

+
+
+
New Features
    +
  • CUPTI supports device-wide sampling of the program counter (PC). Program counters along + with the stall reasons from all active warps are sampled at a fixed frequency in the round robin order. + Activity record CUpti_ActivityPCSampling enabled using activity kind + CUPTI_ACTIVITY_KIND_PC_SAMPLING outputs stall reason along with PC and other related information. + Enum CUpti_ActivityPCSamplingStallReason lists all the stall reasons. Sampling period is + configurable and can be tuned using API cuptiActivityConfigurePCSampling. + This feature is available on devices with compute capability 5.2. + +
  • +
  • Added new activity record CUpti_ActivityInstructionCorrelation + which can be used to dump source locator records for all the PCs of the function. + +
  • +
  • All events and metrics for devices with compute capability 3.x and 5.0 can be + collected accurately in presence of multiple contexts on the GPU. In previous releases only some + events and metrics could be collected accurately when multiple contexts were executing on the GPU. + +
  • +
  • Unified memory profiling is enhanced by providing fine grain data transfers to and from the GPU, + coupled with more accurate timestamps with each transfer. This information is provided through + new activity record CUpti_ActivityUnifiedMemoryCounter2, deprecating old record + CUpti_ActivityUnifiedMemoryCounter. + +
  • +
  • MPS tracing and profiling support is extended on multi-gpu setups. + +
  • +
  • Activity record CUpti_ActivityDevice for device information has been deprecated + and replaced by new activity record CUpti_ActivityDevice2. New record adds device UUID + which can be used to uniquely identify the device across profiler runs. + +
  • +
  • Activity record CUpti_ActivityKernel2 for kernel execution has been deprecated + and replaced by new activity record CUpti_ActivityKernel3. New record gives + information about Global Partitioned Cache Configuration requested and executed. Partitioned + global caching has an impact on occupancy calculation. If it is ON, then a CTA can only use a + half SM, and thus a half of the registers available per SM. The new fields apply for devices + with compute capability 5.2 and higher. Note that this change was done in CUDA 6.5 release + with support for compute capabilty 5.2. + +
  • +
+
+
+
+
+
+

1.1.25. Updates in CUDA 6.5

+
+
+
New Features
    +
  • Instruction classification is done for source-correlated Instruction Execution + activity CUpti_ActivityInstructionExecution. + See CUpti_ActivityInstructionClass for instruction classes. + +
  • +
  • Two new device attributes are added to the activity CUpti_DeviceAttribute: + +
      +
    • CUPTI_DEVICE_ATTR_FLOP_SP_PER_CYCLE gives peak single precision + flop per cycle for the GPU. + +
    • +
    • CUPTI_DEVICE_ATTR_FLOP_DP_PER_CYCLE gives peak double precision + flop per cycle for the GPU. + +
    • +
    +
  • +
  • Two new metric properties are added: + +
      +
    • CUPTI_METRIC_PROPERTY_FLOP_SP_PER_CYCLE gives peak single precision + flop per cycle for the GPU. + +
    • +
    • CUPTI_METRIC_PROPERTY_FLOP_DP_PER_CYCLE gives peak double precision + flop per cycle for the GPU. + +
    • +
    +
  • +
  • Activity record CUpti_ActivityGlobalAccess for source level global access + information has been deprecated and replaced by new activity record + CUpti_ActivityGlobalAccess2. New record additionally gives information needed + to map SASS assembly instructions to CUDA C source code. And it also provides ideal L2 + transactions count based on the access pattern. + +
  • +
  • Activity record CUpti_ActivityBranch for source level branch information + has been deprecated and replaced by new activity record CUpti_ActivityBranch2. + New record additionally gives information needed to map SASS assembly instructions + to CUDA C source code. + +
  • +
  • Sample sass_source_map is added to demonstrate the mapping of SASS + assembly instructions to CUDA C source code. + +
  • +
  • Default event collection mode is changed to Kernel (CUPTI_EVENT_COLLECTION_MODE_KERNEL) + from Continuous (CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS). Also Continuous mode + is supported only on Tesla devices. + +
  • +
  • Profiling results might be inconsistent when auto boost is enabled. Profiler tries to + disable auto boost by default, it might fail to do so in some conditions, but profiling + will continue. A new API cuptiGetAutoBoostState is added to query the auto boost + state of the device. This API returns error CUPTI_ERROR_NOT_SUPPORTED + on devices that don't support auto boost. Note that auto boost is supported only on certain + Tesla devices from the Kepler+ family. + +
  • +
  • Activity record CUpti_ActivityKernel2 for kernel execution has been deprecated + and replaced by new activity record CUpti_ActivityKernel3. New record additionally gives + information about Global Partitioned Cache Configuration requested and executed. The new fields + apply for devices with 5.2 Compute Capability. + +
  • +
+
+
+
+
+
+

1.1.26. Updates in CUDA 6.0

+
+
+
New Features
    +
  • Two new CUPTI activity kinds have been introduced to + enable two new types of source-correlated data + collection. The Instruction Execution kind + collects SASS-level instruction execution counts, + divergence data, and predication data. The Shared + Access kind collects source correlated data + indication inefficient shared memory accesses. + +
  • +
  • CUPTI provides support for CUDA applications + using Unified Memory. A new activity record reports Unified Memory activity + such as transfers to and from a GPU and the number of Unified Memory + related page faults. + +
  • +
  • CUPTI recognized and reports the special MPS + context that is used by CUDA applications running on a + system with MPS enabled. + +
  • +
  • The CUpti_ActivityContext activity record + CUpti_ActivityContext has been updated to + introduce a new field into the structure in a + backwards compatible manner. + The 32-bit computeApiKind field was replaced with + two 16 bit fields, computeApiKind and + defaultStreamId. Because all valid + computeApiKind values fit within 16 bits, and because + all supported CUDA platforms are little-endian, persisted + context record data read with the new structure will have the + correct value for computeApiKind and have a value of + zero for defaultStreamId. The CUPTI client is + responsible for versioning the persisted context data to + recognize when the defaultStreamId field is valid. + +
  • +
  • To ensure that metric values are calculated as + accurately as possible, a new metric API is introduced. + Function cuptiMetricGetRequiredEventGroupSets + can be used to get the groups of events that should be + collected at the same time. + +
  • +
  • Execution overheads introduced by CUPTI have been + dramatically decreased. + +
  • +
  • The new activity buffer API introduced in CUDA + Toolkit 5.5 is required. The legacy + cuptiActivityEnqueueBuffer and + cuptiActivityDequeueBuffer functions have been + removed. +
  • +
+
+
+
+
+
+

1.1.27. Updates in CUDA 5.5

+
+
+
New Features
    +
  • Applications that use CUDA Dynamic Parallelism can + be profiled using CUPTI. Device-side kernel launches + are reported using a new activity kind. +
  • +
  • Device attributes such as power usage, clocks, + thermals, etc. are reported via a new activity + kind. +
  • +
  • A new activity buffer API uses callbacks to request + and return buffers of activity records. The existing + cuptiActivityEnqueueBuffer and + cuptiActivityDequeueBuffer functions are still + supported but are deprecated and will be removed in a + future release. +
  • +
  • The Event API supports kernel replay so that any + number of events can be collected during a single run of + the application. +
  • +
  • A new metric API cuptiMetricGetValue2 allows + metric values to be calculated for any device, even if + that device is not available on the system. +
  • +
  • CUDA peer-to-peer memory copies are reported + explicitly via the activity API. In previous releases + these memory copies were only partially reported. +
  • +
+
+
+
+
+
+
+

1.2. Known Issues

+
+
+
The following are known issues with the current release. + +
    +
  • A security vulnerability issue required profiling tools to disable features using GPU performance counters + for non-root or non-admin users when using a Windows 419.17 or Linux 418.43 or later driver. + By default, NVIDIA drivers require elevated permissions to access GPU performance counters. + On Tegra platforms, profile as root or using sudo. + On other platforms, you can either start profiling as root or using sudo, or by enabling non-admin profiling. + More details about the issue and the solutions can be found on the ERR_NVGPUCTRPERM + web page. + +
    Note: CUPTI allows tracing features for non-root and non-admin users on desktop platforms only, + Tegra platforms require root or sudo access. +
    +
  • +
  • Profiling results might be inconsistent when auto boost is enabled. + Profiler tries to disable auto boost by default. But it might fail to do + so in some conditions and profiling will continue and results will be + inconsistent. API cuptiGetAutoBoostState() can be used + to query the auto boost state of the device. This API returns error + CUPTI_ERROR_NOT_SUPPORTED on devices that don't support auto boost. + Note that auto boost is supported only on certain Tesla devices with + compute capability 3.0 and higher. + +
  • +
  • CUPTI doesn't populate the activity structures which are deprecated, + instead the newer version of the activity structure is filled with the information. + +
  • +
  • Because of the low resolution of the timer on Windows, the start and end timestamps can be same + for activities having short execution duration on Windows. +
  • +
  • The application which calls CUPTI APIs cannot be used with Nvidia tools like nvprof, + Nvidia Visual Profiler, Nsight Compute, Nsight Systems, + Nvidia Nsight Visual Studio Edition, cuda-gdb and cuda-memcheck. +
  • +
  • PCIe and NVLink records, when enabled using the API cuptiActivityEnable, are not + captured when CUPTI is initialized lazily after the CUDA initialization. + API cuptiActivityEnableAndDump can be used to dump the records for these activities + at any point during the profiling session. +
  • +
  • CUPTI fails to profile the OpenACC application when the OpenACC + library linked with the application has missing definition of the OpenACC API routine/s. + This is indicated by the error code CUPTI_ERROR_OPENACC_UNDEFINED_ROUTINE. +
  • +
  • OpenACC profiling might fail when OpenACC library is linked statically in the + user application. This happens due to the missing definition of the OpenACC API + routines needed for the OpenACC profiling, as compiler might ignore definitions + for the functions not used in the application. This issue can be mitigated by + linking the OpenACC library dynamically. +
  • +
  • Unified memory profiling is not supported on the ARM architecture.
  • +
  • Profiling a C++ application which overloads the new operator at the global scope and uses + any CUDA APIs like cudaMalloc() or cudaMallocManaged() inside the overloaded new operator will + result in a hang. +
  • +
  • Devices with compute capability 6.0 and higher introduce a new feature, compute + preemption, to give fair chance for all compute contexts while running long tasks. + With compute preemption feature- + +
      +
    • If multiple contexts are running in parallel it is possible that long kernels + will get preempted. +
    • +
    • Some kernels may get preempted occasionally due to timeslice expiry for the + context. +
    • +
    + + If kernel has been preempted, the time the kernel spends preempted is still + counted towards kernel duration. + +
    Compute preemption can affect events and metrics collection. The + following are known issues with the current release: + +
      +
    • Events and metrics collection for a MPS client can result in higher counts + than expected on devices with compute capability 7.0 and higher, since MPS + client may get preempted due to termination of another MPS client. +
    • +
    • Events warps_launched and sm_cta_launched and metric inst_per_warp might + provide higher counts than expected on devices with compute capability 6.0 + and higher. Metric unique_warps_launched can be used in place of warps_launched + to get correct count of actual warps launched as it is not affected by + compute preemption. +
    • +
    +
    +

    To avoid compute preemption affecting profiler results try to isolate the context + being profiled: +

    +
      +
    • Run the application on secondary GPU where display is not connected.
    • +
    • On Linux if the application is running on the primary GPU where the display + driver is connected then unload the display driver. +
    • +
    • Run only one process that uses GPU at one time.
    • +
    +
  • +
  • Devices with compute capability 6.0 and higher support demand paging. When the + kernel is scheduled for the first time, all the pages allocated using + cudaMallocManaged and that are required for execution of the kernel are fetched in + the global memory when GPU faults are generated. Profiler requires multiple passes to + collect all the metrics required for kernel analysis. The kernel state needs to be + saved and restored for each kernel replay pass. For devices with compute capability + 6.0 and higher and platforms supporting Unified memory, in the first kernel iteration + the GPU faults will be generated and all pages will be fetched in the global memory. + Second iteration onwards GPU page faults will not occur. This will significantly + affect the memory related events and timing. The time taken from trace will include + the time required to fetch the pages but most of the metrics profiled in multiple + iterations will not include time/cycles required to fetch the pages. This causes + inconsistency in the profiler results. +
  • +
  • When profiling an application that uses CUDA Dynamic Parallelism (CDP) there are + several limitations to the profiling tools. + +
      +
    • CUDA Dynamic Parallelism (CDP) kernel launch tracing has a limitation for devices + with compute capability 7.0 and higher. CUPTI traces all the host launched kernels + until it encounters a host launched kernel which launches child kernels. Subsequent + kernels are not traced. +
    • +
    • CUPTI doesn't report CUDA API calls for device-launched kernels.
    • +
    • CUPTI doesn't report detailed event, metric, and source-level results for + device-launched kernels. Event, metric, and source-level results collected for + CPU-launched kernels will include event, metric, and source-level results for the + entire call-tree of kernels launched from within that kernel. +
    • +
    +
  • +
  • Compilation of samples autorange_profiling and userrange_profiling requires a host + compiler which supports C++11 features. For some g++ compilers, it is required to use + the flag -std=c++11 to turn on C++11 features. +
  • +
  • PC Sampling Activity API is not supported on Tegra platforms, while PC Sampling API is supported on Tegra platforms.
  • +
  • As of CUDA 11.4 and R470 TRD1 driver release, CUPTI is supported in a vGPU environment + which requires a vGPU license. If the license is not obtained after 20 minutes, the + reported performance data including metrics from the GPU will be inaccurate. + This is because of a feature in vGPU environment which reduces performance but retains + functionality as specified + here. + +
  • +
  • CUPTI is not supported on NVIDIA Crypto Mining Processors (CMP). This is reported + using the error code CUPTI_ERROR_CMP_DEVICE_NOT_SUPPORTED. + For more information, please visit the + web page. +
  • +
+
+
+
+
+

1.2.1. Profiling

+
+
+
The following are common known issues for both the event and metric APIs and the profiling APIs: + +
    +
  • Profiling may significantly change the overall performance characteristics of the application. + Refer to the section CUPTI Overhead for more details. +
  • +
  • Profiling a kernel while other contexts are active on the same device (e.g. X server, or + secondary CUDA or graphics application) can result in varying metric values for L2/FB + (Device Memory) related metrics. + Specifically, L2/FB traffic from non-profiled contexts cannot be excluded from the metric results. + To completely avoid this issue, profile the application on a GPU without secondary contexts + accessing the same device (e.g. no X server on Linux). + +
  • +
  • Profiling is not supported for multidevice cooperative kernels, that is, kernels + launched by using the API functions cudaLaunchCooperativeKernelMultiDevice or + cuLaunchCooperativeKernelMultiDevice. +
  • +
  • Enabling certain events can cause GPU kernels to run longer than the driver's + watchdog time-out limit. In these cases the driver will terminate the GPU + kernel resulting in an application error and profiling data will not be + available. Please disable the driver watchdog time out before profiling such + long running CUDA kernels + +
      +
    • On Linux, setting the X Config option Interactive to false is + recommended. +
    • +
    • For Windows, detailed information about TDR (Timeout Detection and Recovery) + and how to disable it is available at + https://docs.microsoft.com/en-us/windows-hardware/drivers/display/timeout-detection-and-recovery +
    • +
    +
  • +
  • Profiling is not supported on the Windows Subsystem for Linux version 2 (WSL2).
  • +
+
+
+
+
+
1.2.1.1. Event and Metric API
+
+
+
The following are known issues related to Event and Metric API: + +
    +
  • The CUPTI event APIs from the header cupti_events.h + and metric APIs from the header cupti_metrics.h are not + supported for the devices with compute capability 7.5 and higher. + These are replaced by Profiling API + and Perfworks metric API. + Refer to the section Migration to the Profiling API. + +
  • +
  • While collecting events in continuous mode, event reporting may be delayed + i.e. event values may be returned by a later call to readEvent(s) API and the + event values for the last readEvent(s) API may get lost. + +
  • +
  • When profiling events, it is possible that the domain instance that gets + profiled gives event value 0 due to absence of workload on the domain instance + since CUPTI profiles one instance of the domain by default. + To profile all instances of the domain, user can set event group attribute + CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES through API + cuptiEventGroupSetAttribute(). + +
  • +
  • Profiling results might be incorrect for CUDA applications compiled with nvcc version + older than 9.0 for devices with compute capability 6.0 and 6.1. Profiling session will + continue and CUPTI will notify it using error code + CUPTI_ERROR_CUDA_COMPILER_NOT_COMPATIBLE. It is advised to recompile the + application code with nvcc version 9.0 or later. + Ignore this warning if code is already compiled with the recommended nvcc version. +
  • +
  • For some metrics, the required events can only be collected for a single CUDA + context. For an application that uses multiple CUDA contexts, these metrics + will only be collected for one of the contexts. The metrics that can be + collected only for a single CUDA context are indicated in the metric reference tables. +
  • +
  • Some metric values are calculated assuming a kernel is large enough to occupy + all device multiprocessors with approximately the same amount of work. If a + kernel launch does not have this characteristic, then those metric values may + not be accurate. +
  • +
  • Some events and metrics are not available on all devices. For list of metrics, + you can refer to the metric reference tables. +
  • +
  • CUPTI can give out of memory error for event and metrics profiling, it could be due + to large number of instructions in the kernel. +
  • +
  • Profiling is not supported for CUDA kernel nodes launched by a CUDA Graph.
  • +
  • These APIs are not supported on below system configurations: + +
      +
    • 64-bit ARM Server CPU architecture (arm64 SBSA).
    • +
    • Virtual GPUs (vGPU).
    • +
    +
  • +
+
+
+
+
+
+
1.2.1.2. Profiling and Perfworks Metric API
+
+
+
The following are known issues related to the Profiling and Perfworks Metric API: + +
    +
  • Profiling a kernel while any other GPU work is executing on the same + MIG compute instance can result in varying metric values for all units. Care should be taken to + serialize, or otherwise prevent concurrent CUDA launches within the target application to ensure + those kernels do not influence each other. Be aware that GPU work issued through other APIs in the + target process or workloads created by non-target processes running simultaneously in the same MIG + compute instance will influence the collected metrics. Note that it is acceptable to run CUDA + processes in other MIG compute instances as they will not influence the profiled MIG compute instance. + +
  • +
  • For devices with compute capability 8.0, the NVLink topology information is available + but NVLink performance metrics (nvlrx__* and nvltx__*) + are not supported due to a potential application hang during data collection. +
  • +
  • Profiling is not supported under MPS (Multi-Process Service).
  • +
  • For profiling the CUDA kernel nodes launched by a CUDA Graph, not all combinations + of range profiling and replay modes are supported. User replay and application replay modes + with auto range are not supported. In the user range mode, all the kernel nodes launched + by the CUDA Graph will be profiled, user can't do the profiling for a range of kernels. +
  • +
  • Profiling kernels executed on a device that is part of an SLI group is not supported.
  • +
  • Profiling is not supported for OptiX applications.
  • +
  • Refer to the section for differences from event and metric APIs. +
  • +
+
+
+
+
+
+
+
+

1.3. Support

+
+

+ Information on supported platforms and GPUs. + +

+
+
+

1.3.1. Platform Support

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1. Platforms supported by CUPTI
PlatformSupport
WindowsYes
Windows Subsystem for Linux version 2 (WSL2)Yes*
Linux (x86_64)Yes
Linux (ppc64le)Yes
Linux (aarch64 sbsa)Yes*
Linux (x86_64) (Drive SDK)Yes*
Linux (aarch64)Yes*
QNXYes*
Mac OSXNo
AndroidNo
+
+

+ Tracing and profiling of 32-bit processes is not supported. + +

+

+ Event and Metric APIs are not supported on Linux (aarch64 sbsa) and WSL2 platforms. + +

+

+ The PC Sampling API is not supported on WSL2 platform. + +

+

+ The Profiling API is not supported on WSL2 platform. + +

+
+
+
+

1.3.2. GPU Support

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2. GPU architectures supported by different CUPTI APIs
CUPTI APISupported GPU architecturesNotes
ActivityKepler and later GPU architectures, i.e. devices with + compute capability 3.5 and higher +  
CallbackKepler and later GPU architectures, i.e. devices with + compute capability 3.5 and higher +  
EventKepler, Maxwell, Pascal, VoltaNot supported on Turing and later GPU architectures, + i.e. devices with compute capability 7.5 and higher +
MetricKepler, Maxwell, Pascal, VoltaNot supported on Turing and later GPU architectures, + i.e. devices with compute capability 7.5 and higher +
ProfilingVolta and later GPU architectures, i.e. devices with + compute capability 7.0 and higher + Not supported on Kepler, Maxwell and Pascal GPUs
PC SamplingVolta and later GPU architectures, i.e. devices with + compute capability 7.0 and higher + Not supported on Kepler, Maxwell and Pascal GPUs
CheckpointVolta and later GPU architectures, i.e. devices with + compute capability 7.0 and higher + Not supported on Kepler, Maxwell and Pascal GPUs
+
+
+
+
+
+ +
+ +
+
+
+ + + + + + \ No newline at end of file diff --git a/doc/Cupti/structBufferInfo.html b/doc/Cupti/structBufferInfo.html new file mode 100644 index 0000000000000000000000000000000000000000..13c0f78c07a70922d34f39a18b8582f9dce643db --- /dev/null +++ b/doc/Cupti/structBufferInfo.html @@ -0,0 +1,108 @@ + + +Cupti: BufferInfo Struct Reference + + + + + +
+

BufferInfo Struct Reference
+ +[CUPTI PC Sampling Utility API] +

BufferInfo will be stored in the file for every buffer i.e for every call of UtilDumpPcSamplingBufferInFile() API. + +

+ + + + + + + + + + + +

Data Fields

uint64_t bufferByteSize
uint64_t numSelectedStallReasons
size_t numStallReasons
uint64_t recordCount
+


Field Documentation

+ +
+
+ + + + +
uint64_t BufferInfo::bufferByteSize
+
+
+ +

+Buffer size in Bytes. +

+

+ +

+ +
+ +

+Total number of stall reasons in single record. +

+

+ +

+
+ + + + +
size_t BufferInfo::numStallReasons
+
+
+ +

+Count of all stall reasons supported on the GPU +

+

+ +

+
+ + + + +
uint64_t BufferInfo::recordCount
+
+
+ +

+Total number of PC records. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUPTI_1_1PcSamplingUtil_1_1CUptiUtil__GetBufferInfoParams.html b/doc/Cupti/structCUPTI_1_1PcSamplingUtil_1_1CUptiUtil__GetBufferInfoParams.html new file mode 100644 index 0000000000000000000000000000000000000000..bf5840facf5387c99f8fe997a198b60b6e73baf9 --- /dev/null +++ b/doc/Cupti/structCUPTI_1_1PcSamplingUtil_1_1CUptiUtil__GetBufferInfoParams.html @@ -0,0 +1,93 @@ + + +Cupti: CUPTI::PcSamplingUtil::CUptiUtil_GetBufferInfoParams Struct Reference + + + + + +
+

CUPTI::PcSamplingUtil::CUptiUtil_GetBufferInfoParams Struct Reference
+ +[CUPTI PC Sampling Utility API] +

Params for CuptiUtilGetBufferInfo. + +

+ + + + + + + + + +

Data Fields

BufferInfo bufferInfoData
std::ifstream * fileHandler
size_t size
+


Field Documentation

+ +

+ +

+ +
+ +

+File handle. +

+

+ +

+ +
+ +

+Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUPTI_1_1PcSamplingUtil_1_1CUptiUtil__GetHeaderDataParams.html b/doc/Cupti/structCUPTI_1_1PcSamplingUtil_1_1CUptiUtil__GetHeaderDataParams.html new file mode 100644 index 0000000000000000000000000000000000000000..2bc1d908ce73df85a4851e82ef5ba7666b2e0876 --- /dev/null +++ b/doc/Cupti/structCUPTI_1_1PcSamplingUtil_1_1CUptiUtil__GetHeaderDataParams.html @@ -0,0 +1,93 @@ + + +Cupti: CUPTI::PcSamplingUtil::CUptiUtil_GetHeaderDataParams Struct Reference + + + + + +
+

CUPTI::PcSamplingUtil::CUptiUtil_GetHeaderDataParams Struct Reference
+ +[CUPTI PC Sampling Utility API] +

Params for CuptiUtilGetHeaderData. + +

+ + + + + + + + + +

Data Fields

std::ifstream * fileHandler
Header headerInfo
size_t size
+


Field Documentation

+ +
+ +
+ +

+File handle. +

+

+ +

+ +

+ +
+ +

+Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUPTI_1_1PcSamplingUtil_1_1CUptiUtil__GetPcSampDataParams.html b/doc/Cupti/structCUPTI_1_1PcSamplingUtil_1_1CUptiUtil__GetPcSampDataParams.html new file mode 100644 index 0000000000000000000000000000000000000000..b4c2fbec962a818c81dd9e86b70e9dad55167bde --- /dev/null +++ b/doc/Cupti/structCUPTI_1_1PcSamplingUtil_1_1CUptiUtil__GetPcSampDataParams.html @@ -0,0 +1,178 @@ + + +Cupti: CUPTI::PcSamplingUtil::CUptiUtil_GetPcSampDataParams Struct Reference + + + + + +
+

CUPTI::PcSamplingUtil::CUptiUtil_GetPcSampDataParams Struct Reference
+ +[CUPTI PC Sampling Utility API] +

Params for CuptiUtilGetPcSampData. + +

+ + + + + + + + + + + + + + + + + + + +

Data Fields

PcSamplingBufferType bufferType
std::ifstream * fileHandler
size_t numAttributes
BufferInfopBufferInfoData
CUpti_PCSamplingConfigurationInfopPCSamplingConfigurationInfo
PcSamplingStallReasonspPcSamplingStallReasons
void * pSamplingData
size_t size
+


Field Documentation

+ +
+ +
+ +

+Type of buffer to store in file +

+

+ +

+ +
+ +

+File handle. +

+

+ +

+ +
+ +

+Number of configuration attributes +

+

+ +

+ +
+ +

+Pointer to collected buffer info using CuptiUtilGetBufferInfo +

+

+ +

+ +

+ +
+ +

+Refer PcSamplingStallReasons. For stallReasons field of PcSamplingStallReasons it is expected to allocate memory for each string element of array. +

+

+ +

+ +
+ +

+Pointer to allocated memory to store retrieved data from file. +

+

+ +

+ +
+ +

+Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUPTI_1_1PcSamplingUtil_1_1CUptiUtil__MergePcSampDataParams.html b/doc/Cupti/structCUPTI_1_1PcSamplingUtil_1_1CUptiUtil__MergePcSampDataParams.html new file mode 100644 index 0000000000000000000000000000000000000000..df075a68d9bb14547812eef7c772731ce4278b72 --- /dev/null +++ b/doc/Cupti/structCUPTI_1_1PcSamplingUtil_1_1CUptiUtil__MergePcSampDataParams.html @@ -0,0 +1,127 @@ + + +Cupti: CUPTI::PcSamplingUtil::CUptiUtil_MergePcSampDataParams Struct Reference + + + + + +
+

CUPTI::PcSamplingUtil::CUptiUtil_MergePcSampDataParams Struct Reference
+ +[CUPTI PC Sampling Utility API] +

Params for CuptiUtilMergePcSampData. + +

+ + + + + + + + + + + + + +

Data Fields

CUpti_PCSamplingData ** MergedPcSampDataBuffers
size_t numberOfBuffers
size_t * numMergedBuffer
CUpti_PCSamplingDataPcSampDataBuffer
size_t size
+


Field Documentation

+ +
+ +
+ +

+Pointer to array of merged buffers as per the range id. +

+

+ +

+ +
+ +

+Number of buffers to merge. +

+

+ +

+ +
+ +

+Number of merged buffers. +

+

+ +

+ +
+ +

+Pointer to array of buffers to merge +

+

+ +

+ +
+ +

+Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUPTI_1_1PcSamplingUtil_1_1CUptiUtil__PutPcSampDataParams.html b/doc/Cupti/structCUPTI_1_1PcSamplingUtil_1_1CUptiUtil__PutPcSampDataParams.html new file mode 100644 index 0000000000000000000000000000000000000000..a75cb4a73891ab47e79ac80ac7ee4c5c98ec70f7 --- /dev/null +++ b/doc/Cupti/structCUPTI_1_1PcSamplingUtil_1_1CUptiUtil__PutPcSampDataParams.html @@ -0,0 +1,161 @@ + + +Cupti: CUPTI::PcSamplingUtil::CUptiUtil_PutPcSampDataParams Struct Reference + + + + + +
+

CUPTI::PcSamplingUtil::CUptiUtil_PutPcSampDataParams Struct Reference
+ +[CUPTI PC Sampling Utility API] +

Params for CuptiUtilPutPcSampData. + +

+ + + + + + + + + + + + + + + + + +

Data Fields

PcSamplingBufferType bufferType
const char * fileName
size_t numAttributes
CUpti_PCSamplingConfigurationInfopPCSamplingConfigurationInfo
PcSamplingStallReasonspPcSamplingStallReasons
void * pSamplingData
size_t size
+


Field Documentation

+ +
+ +
+ +

+Type of buffer to store in file +

+

+ +

+ +
+ +

+File name to store buffer into it. +

+

+ +

+ +
+ +

+Number of configured attributes +

+

+ +

+ +
+ +

+Refer CUpti_PCSamplingConfigurationInfo It is expected to provide configuration details of at least CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON attribute. +

+

+ +

+ +

+ +
+ +

+PC sampling buffer. +

+

+ +

+ +
+ +

+Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Activity.html b/doc/Cupti/structCUpti__Activity.html new file mode 100644 index 0000000000000000000000000000000000000000..c9fc9e82833694dccc1af8ad00fe401d9a9356f4 --- /dev/null +++ b/doc/Cupti/structCUpti__Activity.html @@ -0,0 +1,61 @@ + + +Cupti: CUpti_Activity Struct Reference + + + + + +
+

CUpti_Activity Struct Reference
+ +[CUPTI Activity API] +

The base activity record. +More... +

+ + + + + +

Data Fields

CUpti_ActivityKind kind
+


Detailed Description

+The activity API uses a CUpti_Activity as a generic representation for any activity. The 'kind' field is used to determine the specific activity kind, and from that the CUpti_Activity object can be cast to the specific activity record type appropriate for that kind.

+Note that all activity record types are padded and aligned to ensure that each member of the record is naturally aligned.

+

See also:
CUpti_ActivityKind
+

Field Documentation

+ +
+ +
+ +

+The kind of this activity. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityAPI.html b/doc/Cupti/structCUpti__ActivityAPI.html new file mode 100644 index 0000000000000000000000000000000000000000..b77f470c1610c812d51263ceecba7a49f93e27a3 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityAPI.html @@ -0,0 +1,177 @@ + + +Cupti: CUpti_ActivityAPI Struct Reference + + + + + +
+

CUpti_ActivityAPI Struct Reference
+ +[CUPTI Activity API] +

The activity record for a driver or runtime API invocation. +More... +

+ + + + + + + + + + + + + + + + + + + +

Data Fields

CUpti_CallbackId cbid
uint32_t correlationId
uint64_t end
CUpti_ActivityKind kind
uint32_t processId
uint32_t returnValue
uint64_t start
uint32_t threadId
+


Detailed Description

+This activity record represents an invocation of a driver or runtime API (CUPTI_ACTIVITY_KIND_DRIVER and CUPTI_ACTIVITY_KIND_RUNTIME).

Field Documentation

+ +
+ +
+ +

+The ID of the driver or runtime function. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityAPI::correlationId
+
+
+ +

+The correlation ID of the driver or runtime CUDA function. Each function invocation is assigned a unique correlation ID that is identical to the correlation ID in the memcpy, memset, or kernel activity record that is associated with this function. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityAPI::end
+
+
+ +

+The end timestamp for the function, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the function. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_DRIVER, CUPTI_ACTIVITY_KIND_RUNTIME, or CUPTI_ACTIVITY_KIND_INTERNAL_LAUNCH_API. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityAPI::processId
+
+
+ +

+The ID of the process where the driver or runtime CUDA function is executing. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityAPI::returnValue
+
+
+ +

+The return value for the function. For a CUDA driver function with will be a CUresult value, and for a CUDA runtime function this will be a cudaError_t value. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityAPI::start
+
+
+ +

+The start timestamp for the function, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the function. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityAPI::threadId
+
+
+ +

+The ID of the thread where the driver or runtime CUDA function is executing. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityAutoBoostState.html b/doc/Cupti/structCUpti__ActivityAutoBoostState.html new file mode 100644 index 0000000000000000000000000000000000000000..4ecf41e81735888a8bb8f7c4615b89e091433d60 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityAutoBoostState.html @@ -0,0 +1,75 @@ + + +Cupti: CUpti_ActivityAutoBoostState Struct Reference + + + + + +
+

CUpti_ActivityAutoBoostState Struct Reference
+ +[CUPTI Activity API] +

Device auto boost state structure. +More... +

+ + + + + + + +

Data Fields

uint32_t enabled
uint32_t pid
+


Detailed Description

+This structure defines auto boost state for a device. See function cuptiGetAutoBoostState

Field Documentation

+ +
+ +
+ +

+Returned auto boost state. 1 is returned in case auto boost is enabled, 0 otherwise +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityAutoBoostState::pid
+
+
+ +

+Id of process that has set the current boost state. The value will be CUPTI_AUTO_BOOST_INVALID_CLIENT_PID if the user does not have the permission to query process ids or there is an error in querying the process id. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityBranch.html b/doc/Cupti/structCUpti__ActivityBranch.html new file mode 100644 index 0000000000000000000000000000000000000000..a2bef230d510fec9d6dc6f23f1d9e54bab98f636 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityBranch.html @@ -0,0 +1,160 @@ + + +Cupti: CUpti_ActivityBranch Struct Reference + + + + + +
+

CUpti_ActivityBranch Struct Reference
+ +[CUPTI Activity API] +

The activity record for source level result branch. (deprecated). +More... +

+ + + + + + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
uint32_t diverged
uint32_t executed
CUpti_ActivityKind kind
uint32_t pcOffset
uint32_t sourceLocatorId
uint64_t threadsExecuted
+


Detailed Description

+This activity record the locations of the branches in the source (CUPTI_ACTIVITY_KIND_BRANCH). Branch activities are now reported using the CUpti_ActivityBranch2 activity record.

Field Documentation

+ +
+ +
+ +

+The correlation ID of the kernel to which this result is associated. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityBranch::diverged
+
+
+ +

+Number of times this branch diverged +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityBranch::executed
+
+
+ +

+The number of times this instruction was executed per warp. It will be incremented regardless of predicate or condition code. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_BRANCH. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityBranch::pcOffset
+
+
+ +

+The pc offset for the branch. +

+

+ +

+ +
+ +

+The ID for source locator. +

+

+ +

+ +
+ +

+This increments each time when this instruction is executed by number of threads that executed this instruction +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityBranch2.html b/doc/Cupti/structCUpti__ActivityBranch2.html new file mode 100644 index 0000000000000000000000000000000000000000..48b8f9b2c69a50143b0cd793015f7baa16ac812d --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityBranch2.html @@ -0,0 +1,194 @@ + + +Cupti: CUpti_ActivityBranch2 Struct Reference + + + + + +
+

CUpti_ActivityBranch2 Struct Reference
+ +[CUPTI Activity API] +

The activity record for source level result branch. +More... +

+ + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
uint32_t diverged
uint32_t executed
uint32_t functionId
CUpti_ActivityKind kind
uint32_t pad
uint32_t pcOffset
uint32_t sourceLocatorId
uint64_t threadsExecuted
+


Detailed Description

+This activity record the locations of the branches in the source (CUPTI_ACTIVITY_KIND_BRANCH).

Field Documentation

+ +
+ +
+ +

+The correlation ID of the kernel to which this result is associated. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityBranch2::diverged
+
+
+ +

+Number of times this branch diverged +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityBranch2::executed
+
+
+ +

+The number of times this instruction was executed per warp. It will be incremented regardless of predicate or condition code. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityBranch2::functionId
+
+
+ +

+Correlation ID with global/device function name +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_BRANCH. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityBranch2::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityBranch2::pcOffset
+
+
+ +

+The pc offset for the branch. +

+

+ +

+ +
+ +

+The ID for source locator. +

+

+ +

+ +
+ +

+This increments each time when this instruction is executed by number of threads that executed this instruction +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityCdpKernel.html b/doc/Cupti/structCUpti__ActivityCdpKernel.html new file mode 100644 index 0000000000000000000000000000000000000000..6776e4fdfc49176b9e511ae8b3b355bd9aad8ae8 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityCdpKernel.html @@ -0,0 +1,551 @@ + + +Cupti: CUpti_ActivityCdpKernel Struct Reference + + + + + +
+

CUpti_ActivityCdpKernel Struct Reference
+ +[CUPTI Activity API] +

The activity record for CDP (CUDA Dynamic Parallelism) kernel. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

int32_t blockX
int32_t blockY
int32_t blockZ
uint64_t completed
uint32_t contextId
uint32_t correlationId
uint32_t deviceId
int32_t dynamicSharedMemory
uint64_t end
int64_t gridId
int32_t gridX
int32_t gridY
int32_t gridZ
CUpti_ActivityKind kind
uint32_t localMemoryPerThread
uint32_t localMemoryTotal
const char * name
uint32_t parentBlockX
uint32_t parentBlockY
uint32_t parentBlockZ
int64_t parentGridId
uint64_t queued
uint16_t registersPerThread
uint8_t sharedMemoryConfig
uint64_t start
int32_t staticSharedMemory
uint32_t streamId
uint64_t submitted
uint8_t executed:4
uint8_t requested:4
+


Detailed Description

+This activity record represents a CDP kernel execution.

Field Documentation

+ +
+
+ + + + +
int32_t CUpti_ActivityCdpKernel::blockX
+
+
+ +

+The X-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityCdpKernel::blockY
+
+
+ +

+The Y-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityCdpKernel::blockZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+ +
+ +

+The timestamp when kernel is marked as completed, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion time is unknown. +

+

+ +

+ +
+ +

+The ID of the context where the kernel is executing. +

+

+ +

+ +
+ +

+The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation ID in the driver API activity record that launched the kernel. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityCdpKernel::deviceId
+
+
+ +

+The ID of the device where the kernel is executing. +

+

+ +

+ +
+ +

+The dynamic shared memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityCdpKernel::end
+
+
+ +

+The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+ +
+ +

+The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+
+ + + + +
int64_t CUpti_ActivityCdpKernel::gridId
+
+
+ +

+The grid ID of the kernel. Each kernel execution is assigned a unique grid ID. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityCdpKernel::gridX
+
+
+ +

+The X-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityCdpKernel::gridY
+
+
+ +

+The Y-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityCdpKernel::gridZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_CDP_KERNEL +

+

+ +

+ +
+ +

+The amount of local memory reserved for each thread, in bytes. +

+

+ +

+ +
+ +

+The total amount of local memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityCdpKernel::name
+
+
+ +

+The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be modified. +

+

+ +

+ +
+ +

+The X-dimension of the parent block. +

+

+ +

+ +
+ +

+The Y-dimension of the parent block. +

+

+ +

+ +
+ +

+The Z-dimension of the parent block. +

+

+ +

+ +
+ +

+The grid ID of the parent kernel. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityCdpKernel::queued
+
+
+ +

+The timestamp when kernel is queued up, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the queued time is unknown. +

+

+ +

+ +
+ +

+The number of registers required for each thread executing the kernel. +

+

+ +

+ +
+ +

+The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+ +
+ +

+The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityCdpKernel::start
+
+
+ +

+The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+ +
+ +

+The static shared memory allocated for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityCdpKernel::streamId
+
+
+ +

+The ID of the stream where the kernel is executing. +

+

+ +

+ +
+ +

+The timestamp when kernel is submitted to the gpu, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the submission time is unknown. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityContext.html b/doc/Cupti/structCUpti__ActivityContext.html new file mode 100644 index 0000000000000000000000000000000000000000..edf0fb0b5cb8caf801d9e146b2b33bf990a9fd8a --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityContext.html @@ -0,0 +1,127 @@ + + +Cupti: CUpti_ActivityContext Struct Reference + + + + + +
+

CUpti_ActivityContext Struct Reference
+ +[CUPTI Activity API] +

The activity record for a context. +More... +

+ + + + + + + + + + + + + +

Data Fields

uint16_t computeApiKind
uint32_t contextId
uint32_t deviceId
CUpti_ActivityKind kind
uint16_t nullStreamId
+


Detailed Description

+This activity record represents information about a context (CUPTI_ACTIVITY_KIND_CONTEXT).

Field Documentation

+ +
+ +
+ +

+The compute API kind.

See also:
CUpti_ActivityComputeApiKind
+ +
+

+ +

+
+ + + + +
uint32_t CUpti_ActivityContext::contextId
+
+
+ +

+The context ID. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityContext::deviceId
+
+
+ +

+The device ID. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_CONTEXT. +

+

+ +

+ +
+ +

+The ID for the NULL stream in this context +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityCudaEvent.html b/doc/Cupti/structCUpti__ActivityCudaEvent.html new file mode 100644 index 0000000000000000000000000000000000000000..75f49785990ab8c2f6b596a1d9317fb4f911b467 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityCudaEvent.html @@ -0,0 +1,143 @@ + + +Cupti: CUpti_ActivityCudaEvent Struct Reference + + + + + +
+

CUpti_ActivityCudaEvent Struct Reference
+ +[CUPTI Activity API] +

The activity record for CUDA event. +More... +

+ + + + + + + + + + + + + + + +

Data Fields

uint32_t contextId
uint32_t correlationId
uint32_t eventId
CUpti_ActivityKind kind
uint32_t pad
uint32_t streamId
+


Detailed Description

+This activity is used to track recorded events. (CUPTI_ACTIVITY_KIND_CUDA_EVENT).

Field Documentation

+ +
+ +
+ +

+The ID of the context where the event was recorded. +

+

+ +

+ +
+ +

+The correlation ID of the API to which this result is associated. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityCudaEvent::eventId
+
+
+ +

+A unique event ID to identify the event record. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_CUDA_EVENT. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityCudaEvent::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityCudaEvent::streamId
+
+
+ +

+The compute stream where the event was recorded. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityDevice.html b/doc/Cupti/structCUpti__ActivityDevice.html new file mode 100644 index 0000000000000000000000000000000000000000..a816c10ac459f19601c7fb770ae45880e525dcf4 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityDevice.html @@ -0,0 +1,484 @@ + + +Cupti: CUpti_ActivityDevice Struct Reference + + + + + +
+

CUpti_ActivityDevice Struct Reference
+ +[CUPTI Activity API] +

The activity record for a device. (deprecated). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t computeCapabilityMajor
uint32_t computeCapabilityMinor
uint32_t constantMemorySize
uint32_t coreClockRate
CUpti_ActivityFlag flags
uint64_t globalMemoryBandwidth
uint64_t globalMemorySize
uint32_t id
CUpti_ActivityKind kind
uint32_t l2CacheSize
uint32_t maxBlockDimX
uint32_t maxBlockDimY
uint32_t maxBlockDimZ
uint32_t maxBlocksPerMultiprocessor
uint32_t maxGridDimX
uint32_t maxGridDimY
uint32_t maxGridDimZ
uint32_t maxIPC
uint32_t maxRegistersPerBlock
uint32_t maxSharedMemoryPerBlock
uint32_t maxThreadsPerBlock
uint32_t maxWarpsPerMultiprocessor
const char * name
uint32_t numMemcpyEngines
uint32_t numMultiprocessors
uint32_t numThreadsPerWarp
+


Detailed Description

+This activity record represents information about a GPU device (CUPTI_ACTIVITY_KIND_DEVICE). Device activity is now reported using the CUpti_ActivityDevice4 activity record.

Field Documentation

+ +
+ +
+ +

+Compute capability for the device, major number. +

+

+ +

+ +
+ +

+Compute capability for the device, minor number. +

+

+ +

+ +
+ +

+The amount of constant memory on the device, in bytes. +

+

+ +

+ +
+ +

+The core clock rate of the device, in kHz. +

+

+ +

+ +
+ +

+The flags associated with the device.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The global memory bandwidth available on the device, in kBytes/sec. +

+

+ +

+ +
+ +

+The amount of global memory on the device, in bytes. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice::id
+
+
+ +

+The device ID. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_DEVICE. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice::l2CacheSize
+
+
+ +

+The size of the L2 cache on the device, in bytes. +

+

+ +

+ +
+ +

+Maximum allowed X dimension for a block. +

+

+ +

+ +
+ +

+Maximum allowed Y dimension for a block. +

+

+ +

+ +
+ +

+Maximum allowed Z dimension for a block. +

+

+ +

+ +
+ +

+Maximum number of blocks that can be present on a multiprocessor at any given time. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice::maxGridDimX
+
+
+ +

+Maximum allowed X dimension for a grid. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice::maxGridDimY
+
+
+ +

+Maximum allowed Y dimension for a grid. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice::maxGridDimZ
+
+
+ +

+Maximum allowed Z dimension for a grid. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice::maxIPC
+
+
+ +

+The maximum "instructions per cycle" possible on each device multiprocessor. +

+

+ +

+ +
+ +

+Maximum number of registers that can be allocated to a block. +

+

+ +

+ +
+ +

+Maximum amount of shared memory that can be assigned to a block, in bytes. +

+

+ +

+ +
+ +

+Maximum number of threads allowed in a block. +

+

+ +

+ +
+ +

+Maximum number of warps that can be present on a multiprocessor at any given time. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityDevice::name
+
+
+ +

+The device name. This name is shared across all activity records representing instances of the device, and so should not be modified. +

+

+ +

+ +
+ +

+Number of memory copy engines on the device. +

+

+ +

+ +
+ +

+Number of multiprocessors on the device. +

+

+ +

+ +
+ +

+The number of threads per warp on the device. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityDevice2.html b/doc/Cupti/structCUpti__ActivityDevice2.html new file mode 100644 index 0000000000000000000000000000000000000000..04e4025f215626a625b8e1e25c4f9f4412973550 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityDevice2.html @@ -0,0 +1,569 @@ + + +Cupti: CUpti_ActivityDevice2 Struct Reference + + + + + +
+

CUpti_ActivityDevice2 Struct Reference
+ +[CUPTI Activity API] +

The activity record for a device. (deprecated). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t computeCapabilityMajor
uint32_t computeCapabilityMinor
uint32_t constantMemorySize
uint32_t coreClockRate
uint32_t eccEnabled
CUpti_ActivityFlag flags
uint64_t globalMemoryBandwidth
uint64_t globalMemorySize
uint32_t id
CUpti_ActivityKind kind
uint32_t l2CacheSize
uint32_t maxBlockDimX
uint32_t maxBlockDimY
uint32_t maxBlockDimZ
uint32_t maxBlocksPerMultiprocessor
uint32_t maxGridDimX
uint32_t maxGridDimY
uint32_t maxGridDimZ
uint32_t maxIPC
uint32_t maxRegistersPerBlock
uint32_t maxRegistersPerMultiprocessor
uint32_t maxSharedMemoryPerBlock
uint32_t maxSharedMemoryPerMultiprocessor
uint32_t maxThreadsPerBlock
uint32_t maxWarpsPerMultiprocessor
const char * name
uint32_t numMemcpyEngines
uint32_t numMultiprocessors
uint32_t numThreadsPerWarp
uint32_t pad
CUuuid uuid
+


Detailed Description

+This activity record represents information about a GPU device (CUPTI_ACTIVITY_KIND_DEVICE). Device activity is now reported using the CUpti_ActivityDevice4 activity record.

Field Documentation

+ +
+ +
+ +

+Compute capability for the device, major number. +

+

+ +

+ +
+ +

+Compute capability for the device, minor number. +

+

+ +

+ +
+ +

+The amount of constant memory on the device, in bytes. +

+

+ +

+ +
+ +

+The core clock rate of the device, in kHz. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice2::eccEnabled
+
+
+ +

+ECC enabled flag for device +

+

+ +

+ +
+ +

+The flags associated with the device.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The global memory bandwidth available on the device, in kBytes/sec. +

+

+ +

+ +
+ +

+The amount of global memory on the device, in bytes. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice2::id
+
+
+ +

+The device ID. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_DEVICE. +

+

+ +

+ +
+ +

+The size of the L2 cache on the device, in bytes. +

+

+ +

+ +
+ +

+Maximum allowed X dimension for a block. +

+

+ +

+ +
+ +

+Maximum allowed Y dimension for a block. +

+

+ +

+ +
+ +

+Maximum allowed Z dimension for a block. +

+

+ +

+ +
+ +

+Maximum number of blocks that can be present on a multiprocessor at any given time. +

+

+ +

+ +
+ +

+Maximum allowed X dimension for a grid. +

+

+ +

+ +
+ +

+Maximum allowed Y dimension for a grid. +

+

+ +

+ +
+ +

+Maximum allowed Z dimension for a grid. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice2::maxIPC
+
+
+ +

+The maximum "instructions per cycle" possible on each device multiprocessor. +

+

+ +

+ +
+ +

+Maximum number of registers that can be allocated to a block. +

+

+ +

+ +
+ +

+Maximum number of 32-bit registers available per multiprocessor. +

+

+ +

+ +
+ +

+Maximum amount of shared memory that can be assigned to a block, in bytes. +

+

+ +

+ +
+ +

+Maximum amount of shared memory available per multiprocessor, in bytes. +

+

+ +

+ +
+ +

+Maximum number of threads allowed in a block. +

+

+ +

+ +
+ +

+Maximum number of warps that can be present on a multiprocessor at any given time. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityDevice2::name
+
+
+ +

+The device name. This name is shared across all activity records representing instances of the device, and so should not be modified. +

+

+ +

+ +
+ +

+Number of memory copy engines on the device. +

+

+ +

+ +
+ +

+Number of multiprocessors on the device. +

+

+ +

+ +
+ +

+The number of threads per warp on the device. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice2::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+
+ + + + +
CUuuid CUpti_ActivityDevice2::uuid
+
+
+ +

+The device UUID. This value is the globally unique immutable alphanumeric identifier of the device. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityDevice3.html b/doc/Cupti/structCUpti__ActivityDevice3.html new file mode 100644 index 0000000000000000000000000000000000000000..dcef82f64d5ef6f004c9592af31e1339457bf9b6 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityDevice3.html @@ -0,0 +1,586 @@ + + +Cupti: CUpti_ActivityDevice3 Struct Reference + + + + + +
+

CUpti_ActivityDevice3 Struct Reference
+ +[CUPTI Activity API] +

The activity record for a device. (CUDA 7.0 onwards). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t computeCapabilityMajor
uint32_t computeCapabilityMinor
uint32_t constantMemorySize
uint32_t coreClockRate
uint32_t eccEnabled
CUpti_ActivityFlag flags
uint64_t globalMemoryBandwidth
uint64_t globalMemorySize
uint32_t id
uint8_t isCudaVisible
CUpti_ActivityKind kind
uint32_t l2CacheSize
uint32_t maxBlockDimX
uint32_t maxBlockDimY
uint32_t maxBlockDimZ
uint32_t maxBlocksPerMultiprocessor
uint32_t maxGridDimX
uint32_t maxGridDimY
uint32_t maxGridDimZ
uint32_t maxIPC
uint32_t maxRegistersPerBlock
uint32_t maxRegistersPerMultiprocessor
uint32_t maxSharedMemoryPerBlock
uint32_t maxSharedMemoryPerMultiprocessor
uint32_t maxThreadsPerBlock
uint32_t maxWarpsPerMultiprocessor
const char * name
uint32_t numMemcpyEngines
uint32_t numMultiprocessors
uint32_t numThreadsPerWarp
uint32_t pad
CUuuid uuid
+


Detailed Description

+This activity record represents information about a GPU device (CUPTI_ACTIVITY_KIND_DEVICE). Device activity is now reported using the CUpti_ActivityDevice4 activity record.

Field Documentation

+ +
+ +
+ +

+Compute capability for the device, major number. +

+

+ +

+ +
+ +

+Compute capability for the device, minor number. +

+

+ +

+ +
+ +

+The amount of constant memory on the device, in bytes. +

+

+ +

+ +
+ +

+The core clock rate of the device, in kHz. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice3::eccEnabled
+
+
+ +

+ECC enabled flag for device +

+

+ +

+ +
+ +

+The flags associated with the device.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The global memory bandwidth available on the device, in kBytes/sec. +

+

+ +

+ +
+ +

+The amount of global memory on the device, in bytes. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice3::id
+
+
+ +

+The device ID. +

+

+ +

+ +
+ +

+Flag to indicate whether the device is visible to CUDA. Users can set the device visibility using CUDA_VISIBLE_DEVICES environment +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_DEVICE. +

+

+ +

+ +
+ +

+The size of the L2 cache on the device, in bytes. +

+

+ +

+ +
+ +

+Maximum allowed X dimension for a block. +

+

+ +

+ +
+ +

+Maximum allowed Y dimension for a block. +

+

+ +

+ +
+ +

+Maximum allowed Z dimension for a block. +

+

+ +

+ +
+ +

+Maximum number of blocks that can be present on a multiprocessor at any given time. +

+

+ +

+ +
+ +

+Maximum allowed X dimension for a grid. +

+

+ +

+ +
+ +

+Maximum allowed Y dimension for a grid. +

+

+ +

+ +
+ +

+Maximum allowed Z dimension for a grid. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice3::maxIPC
+
+
+ +

+The maximum "instructions per cycle" possible on each device multiprocessor. +

+

+ +

+ +
+ +

+Maximum number of registers that can be allocated to a block. +

+

+ +

+ +
+ +

+Maximum number of 32-bit registers available per multiprocessor. +

+

+ +

+ +
+ +

+Maximum amount of shared memory that can be assigned to a block, in bytes. +

+

+ +

+ +
+ +

+Maximum amount of shared memory available per multiprocessor, in bytes. +

+

+ +

+ +
+ +

+Maximum number of threads allowed in a block. +

+

+ +

+ +
+ +

+Maximum number of warps that can be present on a multiprocessor at any given time. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityDevice3::name
+
+
+ +

+The device name. This name is shared across all activity records representing instances of the device, and so should not be modified. +

+

+ +

+ +
+ +

+Number of memory copy engines on the device. +

+

+ +

+ +
+ +

+Number of multiprocessors on the device. +

+

+ +

+ +
+ +

+The number of threads per warp on the device. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice3::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+
+ + + + +
CUuuid CUpti_ActivityDevice3::uuid
+
+
+ +

+The device UUID. This value is the globally unique immutable alphanumeric identifier of the device. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityDevice4.html b/doc/Cupti/structCUpti__ActivityDevice4.html new file mode 100644 index 0000000000000000000000000000000000000000..b982388fcc2f4d929a90f474f5990201939c8ea9 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityDevice4.html @@ -0,0 +1,654 @@ + + +Cupti: CUpti_ActivityDevice4 Struct Reference + + + + + +
+

CUpti_ActivityDevice4 Struct Reference
+ +[CUPTI Activity API] +

The activity record for a device. (CUDA 11.6 onwards). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t computeCapabilityMajor
uint32_t computeCapabilityMinor
uint32_t computeInstanceId
uint32_t constantMemorySize
uint32_t coreClockRate
uint32_t eccEnabled
CUpti_ActivityFlag flags
uint64_t globalMemoryBandwidth
uint64_t globalMemorySize
uint32_t gpuInstanceId
uint32_t id
uint8_t isCudaVisible
uint8_t isMigEnabled
CUpti_ActivityKind kind
uint32_t l2CacheSize
uint32_t maxBlockDimX
uint32_t maxBlockDimY
uint32_t maxBlockDimZ
uint32_t maxBlocksPerMultiprocessor
uint32_t maxGridDimX
uint32_t maxGridDimY
uint32_t maxGridDimZ
uint32_t maxIPC
uint32_t maxRegistersPerBlock
uint32_t maxRegistersPerMultiprocessor
uint32_t maxSharedMemoryPerBlock
uint32_t maxSharedMemoryPerMultiprocessor
uint32_t maxThreadsPerBlock
uint32_t maxWarpsPerMultiprocessor
CUuuid migUuid
const char * name
uint32_t numMemcpyEngines
uint32_t numMultiprocessors
uint32_t numThreadsPerWarp
uint32_t pad
CUuuid uuid
+


Detailed Description

+This activity record represents information about a GPU device (CUPTI_ACTIVITY_KIND_DEVICE).

Field Documentation

+ +
+ +
+ +

+Compute capability for the device, major number. +

+

+ +

+ +
+ +

+Compute capability for the device, minor number. +

+

+ +

+ +
+ +

+Compute Instance id for MIG enabled devices. If mig mode is disabled value is set to UINT32_MAX +

+

+ +

+ +
+ +

+The amount of constant memory on the device, in bytes. +

+

+ +

+ +
+ +

+The core clock rate of the device, in kHz. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice4::eccEnabled
+
+
+ +

+ECC enabled flag for device +

+

+ +

+ +
+ +

+The flags associated with the device.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The global memory bandwidth available on the device, in kBytes/sec. +

+

+ +

+ +
+ +

+The amount of global memory on the device, in bytes. +

+

+ +

+ +
+ +

+GPU Instance id for MIG enabled devices. If mig mode is disabled value is set to UINT32_MAX +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice4::id
+
+
+ +

+The device ID. +

+

+ +

+ +
+ +

+Flag to indicate whether the device is visible to CUDA. Users can set the device visibility using CUDA_VISIBLE_DEVICES environment +

+

+ +

+ +
+ +

+MIG enabled flag for device +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_DEVICE. +

+

+ +

+ +
+ +

+The size of the L2 cache on the device, in bytes. +

+

+ +

+ +
+ +

+Maximum allowed X dimension for a block. +

+

+ +

+ +
+ +

+Maximum allowed Y dimension for a block. +

+

+ +

+ +
+ +

+Maximum allowed Z dimension for a block. +

+

+ +

+ +
+ +

+Maximum number of blocks that can be present on a multiprocessor at any given time. +

+

+ +

+ +
+ +

+Maximum allowed X dimension for a grid. +

+

+ +

+ +
+ +

+Maximum allowed Y dimension for a grid. +

+

+ +

+ +
+ +

+Maximum allowed Z dimension for a grid. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice4::maxIPC
+
+
+ +

+The maximum "instructions per cycle" possible on each device multiprocessor. +

+

+ +

+ +
+ +

+Maximum number of registers that can be allocated to a block. +

+

+ +

+ +
+ +

+Maximum number of 32-bit registers available per multiprocessor. +

+

+ +

+ +
+ +

+Maximum amount of shared memory that can be assigned to a block, in bytes. +

+

+ +

+ +
+ +

+Maximum amount of shared memory available per multiprocessor, in bytes. +

+

+ +

+ +
+ +

+Maximum number of threads allowed in a block. +

+

+ +

+ +
+ +

+Maximum number of warps that can be present on a multiprocessor at any given time. +

+

+ +

+
+ + + + +
CUuuid CUpti_ActivityDevice4::migUuid
+
+
+ +

+The MIG UUID. This value is the globally unique immutable alphanumeric identifier of the device. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityDevice4::name
+
+
+ +

+The device name. This name is shared across all activity records representing instances of the device, and so should not be modified. +

+

+ +

+ +
+ +

+Number of memory copy engines on the device. +

+

+ +

+ +
+ +

+Number of multiprocessors on the device. +

+

+ +

+ +
+ +

+The number of threads per warp on the device. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityDevice4::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+
+ + + + +
CUuuid CUpti_ActivityDevice4::uuid
+
+
+ +

+The device UUID. This value is the globally unique immutable alphanumeric identifier of the device. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityDeviceAttribute.html b/doc/Cupti/structCUpti__ActivityDeviceAttribute.html new file mode 100644 index 0000000000000000000000000000000000000000..c607789cb9e2d810d4325130f6a874b481a7f058 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityDeviceAttribute.html @@ -0,0 +1,131 @@ + + +Cupti: CUpti_ActivityDeviceAttribute Struct Reference + + + + + +
+

CUpti_ActivityDeviceAttribute Struct Reference
+ +[CUPTI Activity API] +

The activity record for a device attribute. +More... +

+ + + + + + + + + + + + + + + + + +

Data Fields

union {
attribute
uint32_t deviceId
CUpti_ActivityFlag flags
CUpti_ActivityKind kind
union {
value
+


Detailed Description

+This activity record represents information about a GPU device: either a CUpti_DeviceAttribute or CUdevice_attribute value (CUPTI_ACTIVITY_KIND_DEVICE_ATTRIBUTE).

Field Documentation

+ +
+
+ + + + +
union { ... } CUpti_ActivityDeviceAttribute::attribute
+
+
+ +

+The attribute, either a CUpti_DeviceAttribute or CUdevice_attribute. Flag CUPTI_ACTIVITY_FLAG_DEVICE_ATTRIBUTE_CUDEVICE is used to indicate what kind of attribute this is. If CUPTI_ACTIVITY_FLAG_DEVICE_ATTRIBUTE_CUDEVICE is 1 then CUdevice_attribute field is value, otherwise CUpti_DeviceAttribute field is valid. +

+

+ +

+ +
+ +

+The ID of the device that this attribute applies to. +

+

+ +

+ +
+ +

+The flags associated with the device.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_DEVICE_ATTRIBUTE. +

+

+ +

+
+ + + + +
union { ... } CUpti_ActivityDeviceAttribute::value
+
+
+ +

+The value for the attribute. See CUpti_DeviceAttribute and CUdevice_attribute for the type of the value for a given attribute. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityEnvironment.html b/doc/Cupti/structCUpti__ActivityEnvironment.html new file mode 100644 index 0000000000000000000000000000000000000000..0506d727cbb4936aec60abb4b1ad5191e8215803 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityEnvironment.html @@ -0,0 +1,350 @@ + + +Cupti: CUpti_ActivityEnvironment Struct Reference + + + + + +
+

CUpti_ActivityEnvironment Struct Reference
+ +[CUPTI Activity API] +

The activity record for CUPTI environmental data. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t deviceId
CUpti_ActivityEnvironmentKind environmentKind
CUpti_ActivityKind kind
uint64_t timestamp
CUpti_EnvironmentClocksThrottleReason clocksThrottleReasons
struct {
   uint32_t   fanSpeed
cooling
uint32_t gpuTemperature
uint32_t memoryClock
uint32_t pcieLinkGen
uint32_t pcieLinkWidth
struct {
   uint32_t   power
   uint32_t   powerLimit
power
uint32_t smClock
struct {
   CUpti_EnvironmentClocksThrottleReason   clocksThrottleReasons
   uint32_t   memoryClock
   uint32_t   pcieLinkGen
   uint32_t   pcieLinkWidth
   uint32_t   smClock
speed
struct {
   uint32_t   gpuTemperature
temperature
+


Detailed Description

+This activity record provides CUPTI environmental data, include power, clocks, and thermals. This information is sampled at various rates and returned in this activity record. The consumer of the record needs to check the environmentKind field to figure out what kind of environmental record this is.

Field Documentation

+ +

+ +

+
+ + + + +
struct { ... } CUpti_ActivityEnvironment::cooling
+
+
+ +

+Data returned for CUPTI_ACTIVITY_ENVIRONMENT_COOLING environment kind. +

+

+ +

+ +
+ +

+The ID of the device +

+

+ +

+ +
+ +

+The kind of data reported in this record. +

+

+ +

+ +
+ +

+The fan speed as percentage of maximum. +

+

+ +

+ +
+ +

+The GPU temperature in degrees C. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_ENVIRONMENT. +

+

+ +

+ +
+ +

+The memory frequency in MHz +

+

+ +

+ +
+ +

+The PCIe link generation. +

+

+ +

+ +
+ +

+The PCIe link width. +

+

+ +

+
+ + + + +
struct { ... } CUpti_ActivityEnvironment::power
+
+
+ +

+Data returned for CUPTI_ACTIVITY_ENVIRONMENT_POWER environment kind. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityEnvironment::power
+
+
+ +

+The power in milliwatts consumed by GPU and associated circuitry. +

+

+ +

+ +
+ +

+The power in milliwatts that will trigger power management algorithm. +

+

+ +

+ +
+ +

+The SM frequency in MHz +

+

+ +

+
+ + + + +
struct { ... } CUpti_ActivityEnvironment::speed
+
+
+ +

+Data returned for CUPTI_ACTIVITY_ENVIRONMENT_SPEED environment kind. +

+

+ +

+
+ + + + +
struct { ... } CUpti_ActivityEnvironment::temperature
+
+
+ +

+Data returned for CUPTI_ACTIVITY_ENVIRONMENT_TEMPERATURE environment kind. +

+

+ +

+ +
+ +

+The timestamp when this sample was retrieved, in ns. A value of 0 indicates that timestamp information could not be collected for the marker. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityEvent.html b/doc/Cupti/structCUpti__ActivityEvent.html new file mode 100644 index 0000000000000000000000000000000000000000..053f66eb732833d757da8bf5a75fb5760ed499cb --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityEvent.html @@ -0,0 +1,126 @@ + + +Cupti: CUpti_ActivityEvent Struct Reference + + + + + +
+

CUpti_ActivityEvent Struct Reference
+ +[CUPTI Activity API] +

The activity record for a CUPTI event. +More... +

+ + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
CUpti_EventDomainID domain
CUpti_EventID id
CUpti_ActivityKind kind
uint64_t value
+


Detailed Description

+This activity record represents a CUPTI event value (CUPTI_ACTIVITY_KIND_EVENT). This activity record kind is not produced by the activity API but is included for completeness and ease-of-use. Profile frameworks built on top of CUPTI that collect event data may choose to use this type to store the collected event data.

Field Documentation

+ +
+ +
+ +

+The correlation ID of the event. Use of this ID is user-defined, but typically this ID value will equal the correlation ID of the kernel for which the event was gathered. +

+

+ +

+ +
+ +

+The event domain ID. +

+

+ +

+ +
+ +

+The event ID. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_EVENT. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityEvent::value
+
+
+ +

+The event value. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityEventInstance.html b/doc/Cupti/structCUpti__ActivityEventInstance.html new file mode 100644 index 0000000000000000000000000000000000000000..f4c0f3b77c76d812232b4519792d5ff9b361f4ae --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityEventInstance.html @@ -0,0 +1,160 @@ + + +Cupti: CUpti_ActivityEventInstance Struct Reference + + + + + +
+

CUpti_ActivityEventInstance Struct Reference
+ +[CUPTI Activity API] +

The activity record for a CUPTI event with instance information. +More... +

+ + + + + + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
CUpti_EventDomainID domain
CUpti_EventID id
uint32_t instance
CUpti_ActivityKind kind
uint32_t pad
uint64_t value
+


Detailed Description

+This activity record represents the a CUPTI event value for a specific event domain instance (CUPTI_ACTIVITY_KIND_EVENT_INSTANCE). This activity record kind is not produced by the activity API but is included for completeness and ease-of-use. Profile frameworks built on top of CUPTI that collect event data may choose to use this type to store the collected event data. This activity record should be used when event domain instance information needs to be associated with the event.

Field Documentation

+ +
+ +
+ +

+The correlation ID of the event. Use of this ID is user-defined, but typically this ID value will equal the correlation ID of the kernel for which the event was gathered. +

+

+ +

+ +
+ +

+The event domain ID. +

+

+ +

+ +
+ +

+The event ID. +

+

+ +

+ +
+ +

+The event domain instance. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_EVENT_INSTANCE. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityEventInstance::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The event value. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityExternalCorrelation.html b/doc/Cupti/structCUpti__ActivityExternalCorrelation.html new file mode 100644 index 0000000000000000000000000000000000000000..0c890dd2a0aaa2feb633df88557e7eb7c88bcc2d --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityExternalCorrelation.html @@ -0,0 +1,129 @@ + + +Cupti: CUpti_ActivityExternalCorrelation Struct Reference + + + + + +
+

CUpti_ActivityExternalCorrelation Struct Reference
+ +[CUPTI Activity API] +

The activity record for correlation with external records. +More... +

+ + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
uint64_t externalId
CUpti_ExternalCorrelationKind externalKind
CUpti_ActivityKind kind
uint32_t reserved
+


Detailed Description

+This activity record correlates native CUDA records (e.g. CUDA Driver API, kernels, memcpys, ...) with records from external APIs such as OpenACC. (CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION).

+

See also:
CUpti_ActivityKind
+

Field Documentation

+ +
+ +
+ +

+The correlation ID of the associated CUDA driver or runtime API record. +

+

+ +

+ +
+ +

+The correlation ID of the associated non-CUDA API record. The exact field in the associated external record depends on that record's activity kind (

See also:
externalKind).
+ +
+

+ +

+ +
+ +

+The kind of external API this record correlated to. +

+

+ +

+ +
+ +

+The kind of this activity. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityFunction.html b/doc/Cupti/structCUpti__ActivityFunction.html new file mode 100644 index 0000000000000000000000000000000000000000..13177a184866a7a5d86f37a4dde84b06397fc6b9 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityFunction.html @@ -0,0 +1,143 @@ + + +Cupti: CUpti_ActivityFunction Struct Reference + + + + + +
+

CUpti_ActivityFunction Struct Reference
+ +[CUPTI Activity API] +

The activity record for global/device functions. +More... +

+ + + + + + + + + + + + + + + +

Data Fields

uint32_t contextId
uint32_t functionIndex
uint32_t id
CUpti_ActivityKind kind
uint32_t moduleId
const char * name
+


Detailed Description

+This activity records function name and corresponding module information. (CUPTI_ACTIVITY_KIND_FUNCTION).

Field Documentation

+ +
+
+ + + + +
uint32_t CUpti_ActivityFunction::contextId
+
+
+ +

+The ID of the context where the function is launched. +

+

+ +

+ +
+ +

+The function's unique symbol index in the module. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityFunction::id
+
+
+ +

+ID to uniquely identify the record +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_FUNCTION. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityFunction::moduleId
+
+
+ +

+The module ID in which this global/device function is present. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityFunction::name
+
+
+ +

+The name of the function. This name is shared across all activity records representing the same kernel, and so should not be modified. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityGlobalAccess.html b/doc/Cupti/structCUpti__ActivityGlobalAccess.html new file mode 100644 index 0000000000000000000000000000000000000000..d7890f27608ceec91253dc2f07106cbb93f83869 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityGlobalAccess.html @@ -0,0 +1,177 @@ + + +Cupti: CUpti_ActivityGlobalAccess Struct Reference + + + + + +
+

CUpti_ActivityGlobalAccess Struct Reference
+ +[CUPTI Activity API] +

The activity record for source-level global access. (deprecated). +More... +

+ + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
uint32_t executed
CUpti_ActivityFlag flags
CUpti_ActivityKind kind
uint64_t l2_transactions
uint32_t pcOffset
uint32_t sourceLocatorId
uint64_t threadsExecuted
+


Detailed Description

+This activity records the locations of the global accesses in the source (CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS). Global access activities are now reported using the CUpti_ActivityGlobalAccess3 activity record.

Field Documentation

+ +
+ +
+ +

+The correlation ID of the kernel to which this result is associated. +

+

+ +

+ +
+ +

+The number of times this instruction was executed per warp. It will be incremented when at least one of thread among warp is active with predicate and condition code evaluating to true. +

+

+ +

+ +
+ +

+The properties of this global access. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS. +

+

+ +

+ +
+ +

+The total number of 32 bytes transactions to L2 cache generated by this access +

+

+ +

+ +
+ +

+The pc offset for the access. +

+

+ +

+ +
+ +

+The ID for source locator. +

+

+ +

+ +
+ +

+This increments each time when this instruction is executed by number of threads that executed this instruction with predicate and condition code evaluating to true. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityGlobalAccess2.html b/doc/Cupti/structCUpti__ActivityGlobalAccess2.html new file mode 100644 index 0000000000000000000000000000000000000000..7bbda23e330b576ee5d57fee7c369e028540210b --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityGlobalAccess2.html @@ -0,0 +1,228 @@ + + +Cupti: CUpti_ActivityGlobalAccess2 Struct Reference + + + + + +
+

CUpti_ActivityGlobalAccess2 Struct Reference
+ +[CUPTI Activity API] +

The activity record for source-level global access. (deprecated in CUDA 9.0). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
uint32_t executed
CUpti_ActivityFlag flags
uint32_t functionId
CUpti_ActivityKind kind
uint64_t l2_transactions
uint32_t pad
uint32_t pcOffset
uint32_t sourceLocatorId
uint64_t theoreticalL2Transactions
uint64_t threadsExecuted
+


Detailed Description

+This activity records the locations of the global accesses in the source (CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS). Global access activities are now reported using the CUpti_ActivityGlobalAccess3 activity record.

Field Documentation

+ +
+ +
+ +

+The correlation ID of the kernel to which this result is associated. +

+

+ +

+ +
+ +

+The number of times this instruction was executed per warp. It will be incremented when at least one of thread among warp is active with predicate and condition code evaluating to true. +

+

+ +

+ +
+ +

+The properties of this global access. +

+

+ +

+ +
+ +

+Correlation ID with global/device function name +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS. +

+

+ +

+ +
+ +

+The total number of 32 bytes transactions to L2 cache generated by this access +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityGlobalAccess2::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The pc offset for the access. +

+

+ +

+ +
+ +

+The ID for source locator. +

+

+ +

+ +
+ +

+The minimum number of L2 transactions possible based on the access pattern. +

+

+ +

+ +
+ +

+This increments each time when this instruction is executed by number of threads that executed this instruction with predicate and condition code evaluating to true. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityGlobalAccess3.html b/doc/Cupti/structCUpti__ActivityGlobalAccess3.html new file mode 100644 index 0000000000000000000000000000000000000000..3c30767c8fee202f6b861399e526f6446a4fe5d9 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityGlobalAccess3.html @@ -0,0 +1,211 @@ + + +Cupti: CUpti_ActivityGlobalAccess3 Struct Reference + + + + + +
+

CUpti_ActivityGlobalAccess3 Struct Reference
+ +[CUPTI Activity API] +

The activity record for source-level global access. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
uint32_t executed
CUpti_ActivityFlag flags
uint32_t functionId
CUpti_ActivityKind kind
uint64_t l2_transactions
uint64_t pcOffset
uint32_t sourceLocatorId
uint64_t theoreticalL2Transactions
uint64_t threadsExecuted
+


Detailed Description

+This activity records the locations of the global accesses in the source (CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS).

Field Documentation

+ +
+ +
+ +

+The correlation ID of the kernel to which this result is associated. +

+

+ +

+ +
+ +

+The number of times this instruction was executed per warp. It will be incremented when at least one of thread among warp is active with predicate and condition code evaluating to true. +

+

+ +

+ +
+ +

+The properties of this global access. +

+

+ +

+ +
+ +

+Correlation ID with global/device function name +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS. +

+

+ +

+ +
+ +

+The total number of 32 bytes transactions to L2 cache generated by this access +

+

+ +

+ +
+ +

+The pc offset for the access. +

+

+ +

+ +
+ +

+The ID for source locator. +

+

+ +

+ +
+ +

+The minimum number of L2 transactions possible based on the access pattern. +

+

+ +

+ +
+ +

+This increments each time when this instruction is executed by number of threads that executed this instruction with predicate and condition code evaluating to true. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityGraphTrace.html b/doc/Cupti/structCUpti__ActivityGraphTrace.html new file mode 100644 index 0000000000000000000000000000000000000000..f6de2d8e30cc3d2263ec9a120615f02ffa3d294f --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityGraphTrace.html @@ -0,0 +1,194 @@ + + +Cupti: CUpti_ActivityGraphTrace Struct Reference + + + + + +
+

CUpti_ActivityGraphTrace Struct Reference
+ +[CUPTI Activity API] +

The activity record for trace of graph execution. +More... +

+ + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t contextId
uint32_t correlationId
uint32_t deviceId
uint64_t end
uint32_t graphId
CUpti_ActivityKind kind
void * reserved
uint64_t start
uint32_t streamId
+


Detailed Description

+This activity record represents execution for a graph without giving visibility about the execution of its nodes. This is intended to reduce overheads in tracing each node. The activity kind is CUPTI_ACTIVITY_KIND_GRAPH_TRACE

Field Documentation

+ +
+ +
+ +

+The ID of the context where the graph is being launched. +

+

+ +

+ +
+ +

+The correlation ID of the graph launch. Each graph launch is assigned a unique correlation ID that is identical to the correlation ID in the driver API activity record that launched the graph. +

+

+ +

+ +
+ +

+The ID of the device where the graph execution is occurring. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityGraphTrace::end
+
+
+ +

+The end timestamp for the graph execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the graph. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityGraphTrace::graphId
+
+
+ +

+The unique ID of the graph that is launched. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_GRAPH_TRACE +

+

+ +

+ +
+ +

+This field is reserved for internal use +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityGraphTrace::start
+
+
+ +

+The start timestamp for the graph execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the graph. +

+

+ +

+ +
+ +

+The ID of the stream where the graph is being launched. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityInstantaneousEvent.html b/doc/Cupti/structCUpti__ActivityInstantaneousEvent.html new file mode 100644 index 0000000000000000000000000000000000000000..9366fc14fea5d09e66f6de92a3e9b082ff48e9a8 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityInstantaneousEvent.html @@ -0,0 +1,143 @@ + + +Cupti: CUpti_ActivityInstantaneousEvent Struct Reference + + + + + +
+

CUpti_ActivityInstantaneousEvent Struct Reference
+ +[CUPTI Activity API] +

The activity record for an instantaneous CUPTI event. +More... +

+ + + + + + + + + + + + + + + +

Data Fields

uint32_t deviceId
CUpti_EventID id
CUpti_ActivityKind kind
uint32_t reserved
uint64_t timestamp
uint64_t value
+


Detailed Description

+This activity record represents a CUPTI event value (CUPTI_ACTIVITY_KIND_EVENT) sampled at a particular instant. This activity record kind is not produced by the activity API but is included for completeness and ease-of-use. Profiler frameworks built on top of CUPTI that collect event data at a particular time may choose to use this type to store the collected event data.

Field Documentation

+ +
+ +
+ +

+The device id +

+

+ +

+ +
+ +

+The event ID. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_INSTANTANEOUS_EVENT. +

+

+ +

+ +
+ +

+Undefined. reserved for internal use +

+

+ +

+ +
+ +

+The timestamp at which event is sampled +

+

+ +

+ +
+ +

+The event value. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityInstantaneousEventInstance.html b/doc/Cupti/structCUpti__ActivityInstantaneousEventInstance.html new file mode 100644 index 0000000000000000000000000000000000000000..dbe35acfa054fe24c48b63735138799c0e0bc2e9 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityInstantaneousEventInstance.html @@ -0,0 +1,160 @@ + + +Cupti: CUpti_ActivityInstantaneousEventInstance Struct Reference + + + + + +
+

CUpti_ActivityInstantaneousEventInstance Struct Reference
+ +[CUPTI Activity API] +

The activity record for an instantaneous CUPTI event with event domain instance information. +More... +

+ + + + + + + + + + + + + + + + + +

Data Fields

uint32_t deviceId
CUpti_EventID id
uint8_t instance
CUpti_ActivityKind kind
uint8_t pad [3]
uint64_t timestamp
uint64_t value
+


Detailed Description

+This activity record represents the a CUPTI event value for a specific event domain instance (CUPTI_ACTIVITY_KIND_EVENT_INSTANCE) sampled at a particular instant. This activity record kind is not produced by the activity API but is included for completeness and ease-of-use. Profiler frameworks built on top of CUPTI that collect event data may choose to use this type to store the collected event data. This activity record should be used when event domain instance information needs to be associated with the event.

Field Documentation

+ +
+ +
+ +

+The device id +

+

+ +

+ +
+ +

+The event ID. +

+

+ +

+ +
+ +

+The event domain instance +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_INSTANTANEOUS_EVENT_INSTANCE. +

+

+ +

+ +
+ +

+Undefined. reserved for internal use +

+

+ +

+ +
+ +

+The timestamp at which event is sampled +

+

+ +

+ +
+ +

+The event value. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityInstantaneousMetric.html b/doc/Cupti/structCUpti__ActivityInstantaneousMetric.html new file mode 100644 index 0000000000000000000000000000000000000000..68e6fd1f20cc9e7e072e2649db2abb1241a79b23 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityInstantaneousMetric.html @@ -0,0 +1,161 @@ + + +Cupti: CUpti_ActivityInstantaneousMetric Struct Reference + + + + + +
+

CUpti_ActivityInstantaneousMetric Struct Reference
+ +[CUPTI Activity API] +

The activity record for an instantaneous CUPTI metric. +More... +

+ + + + + + + + + + + + + + + + + +

Data Fields

uint32_t deviceId
uint8_t flags
CUpti_MetricID id
CUpti_ActivityKind kind
uint8_t pad [3]
uint64_t timestamp
CUpti_MetricValue value
+


Detailed Description

+This activity record represents the collection of a CUPTI metric value (CUPTI_ACTIVITY_KIND_METRIC) at a particular instance. This activity record kind is not produced by the activity API but is included for completeness and ease-of-use. Profiler frameworks built on top of CUPTI that collect metric data may choose to use this type to store the collected metric data.

Field Documentation

+ +
+ +
+ +

+The device id +

+

+ +

+ +
+ +

+The properties of this metric.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The metric ID. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_INSTANTANEOUS_METRIC. +

+

+ +

+ +
+ +

+Undefined. reserved for internal use +

+

+ +

+ +
+ +

+The timestamp at which metric is sampled +

+

+ +

+ +
+ +

+The metric value. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityInstantaneousMetricInstance.html b/doc/Cupti/structCUpti__ActivityInstantaneousMetricInstance.html new file mode 100644 index 0000000000000000000000000000000000000000..b80390c1b801f631a62beb55228a1a956a08fb8b --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityInstantaneousMetricInstance.html @@ -0,0 +1,178 @@ + + +Cupti: CUpti_ActivityInstantaneousMetricInstance Struct Reference + + + + + +
+

CUpti_ActivityInstantaneousMetricInstance Struct Reference
+ +[CUPTI Activity API] +

The instantaneous activity record for a CUPTI metric with instance information. +More... +

+ + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t deviceId
uint8_t flags
CUpti_MetricID id
uint8_t instance
CUpti_ActivityKind kind
uint8_t pad [2]
uint64_t timestamp
CUpti_MetricValue value
+


Detailed Description

+This activity record represents a CUPTI metric value for a specific metric domain instance (CUPTI_ACTIVITY_KIND_METRIC_INSTANCE) sampled at a particular time. This activity record kind is not produced by the activity API but is included for completeness and ease-of-use. Profiler frameworks built on top of CUPTI that collect metric data may choose to use this type to store the collected metric data. This activity record should be used when metric domain instance information needs to be associated with the metric.

Field Documentation

+ +
+ +
+ +

+The device id +

+

+ +

+ +
+ +

+The properties of this metric.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The metric ID. +

+

+ +

+ +
+ +

+The metric domain instance +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_INSTANTANEOUS_METRIC_INSTANCE. +

+

+ +

+ +
+ +

+Undefined. reserved for internal use +

+

+ +

+ +
+ +

+The timestamp at which metric is sampled +

+

+ +

+ +
+ +

+The metric value. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityInstructionCorrelation.html b/doc/Cupti/structCUpti__ActivityInstructionCorrelation.html new file mode 100644 index 0000000000000000000000000000000000000000..60905ff9b6a3d81f8051c33857fd155a6f3e0d8e --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityInstructionCorrelation.html @@ -0,0 +1,143 @@ + + +Cupti: CUpti_ActivityInstructionCorrelation Struct Reference + + + + + +
+

CUpti_ActivityInstructionCorrelation Struct Reference
+ +[CUPTI Activity API] +

The activity record for source-level sass/source line-by-line correlation. +More... +

+ + + + + + + + + + + + + + + +

Data Fields

CUpti_ActivityFlag flags
uint32_t functionId
CUpti_ActivityKind kind
uint32_t pad
uint32_t pcOffset
uint32_t sourceLocatorId
+


Detailed Description

+This activity records source level sass/source correlation information. (CUPTI_ACTIVITY_KIND_INSTRUCTION_CORRELATION).

Field Documentation

+ +
+ +
+ +

+The properties of this instruction. +

+

+ +

+ +
+ +

+Correlation ID with global/device function name +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_INSTRUCTION_CORRELATION. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The pc offset for the instruction. +

+

+ +

+ +
+ +

+The ID for source locator. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityInstructionExecution.html b/doc/Cupti/structCUpti__ActivityInstructionExecution.html new file mode 100644 index 0000000000000000000000000000000000000000..d6ca4177e65989e53e45244dd630a4eb774016fa --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityInstructionExecution.html @@ -0,0 +1,211 @@ + + +Cupti: CUpti_ActivityInstructionExecution Struct Reference + + + + + +
+

CUpti_ActivityInstructionExecution Struct Reference
+ +[CUPTI Activity API] +

The activity record for source-level instruction execution. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
uint32_t executed
CUpti_ActivityFlag flags
uint32_t functionId
CUpti_ActivityKind kind
uint64_t notPredOffThreadsExecuted
uint32_t pad
uint32_t pcOffset
uint32_t sourceLocatorId
uint64_t threadsExecuted
+


Detailed Description

+This activity records result for source level instruction execution. (CUPTI_ACTIVITY_KIND_INSTRUCTION_EXECUTION).

Field Documentation

+ +
+ +
+ +

+The correlation ID of the kernel to which this result is associated. +

+

+ +

+ +
+ +

+The number of times this instruction was executed per warp. It will be incremented regardless of predicate or condition code. +

+

+ +

+ +
+ +

+The properties of this instruction execution. +

+

+ +

+ +
+ +

+Correlation ID with global/device function name +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_INSTRUCTION_EXECUTION. +

+

+ +

+ +
+ +

+This increments each time when this instruction is executed by number of threads that executed this instruction with predicate and condition code evaluating to true. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The pc offset for the instruction. +

+

+ +

+ +
+ +

+The ID for source locator. +

+

+ +

+ +
+ +

+This increments each time when this instruction is executed by number of threads that executed this instruction, regardless of predicate or condition code. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityJit.html b/doc/Cupti/structCUpti__ActivityJit.html new file mode 100644 index 0000000000000000000000000000000000000000..32e2f4ab822f27a951df4435832b98da89ed5340 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityJit.html @@ -0,0 +1,227 @@ + + +Cupti: CUpti_ActivityJit Struct Reference + + + + + +
+

CUpti_ActivityJit Struct Reference
+ +[CUPTI Activity API] +

The activity record for JIT operations. This activity represents the JIT operations (compile, load, store) of a CUmodule from the Compute Cache. Gives the exact hashed path of where the cached module is loaded from, or where the module will be stored after Just-In-Time (JIT) compilation. + +

+ + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

const char * cachePath
uint64_t cacheSize
uint32_t correlationId
uint32_t deviceId
uint64_t end
CUpti_ActivityJitEntryType jitEntryType
uint64_t jitOperationCorrelationId
CUpti_ActivityJitOperationType jitOperationType
CUpti_ActivityKind kind
uint32_t padding
uint64_t start
+


Field Documentation

+ +
+
+ + + + +
const char* CUpti_ActivityJit::cachePath
+
+
+ +

+The path where the fat binary is cached. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityJit::cacheSize
+
+
+ +

+The size of compute cache. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityJit::correlationId
+
+
+ +

+The correlation ID of the JIT operation to which records belong to. Each JIT operation is assigned a unique correlation ID that is identical to the correlation ID in the driver or runtime API activity record that launched the JIT operation. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityJit::deviceId
+
+
+ +

+The device ID. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityJit::end
+
+
+ +

+The end timestamp for the JIT operation, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the JIT operation. +

+

+ +

+ +
+ +

+The JIT entry type. +

+

+ +

+ +
+ +

+The correlation ID to correlate JIT compilation, load and store operations. Each JIT compilation unit is assigned a unique correlation ID at the time of the JIT compilation. This correlation id can be used to find the matching JIT cache load/store records. +

+

+ +

+ +
+ +

+The JIT operation type. +

+

+ +

+ +
+ +

+The activity record kind must be CUPTI_ACTIVITY_KIND_JIT. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityJit::padding
+
+
+ +

+Internal use. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityJit::start
+
+
+ +

+The start timestamp for the JIT operation, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the JIT operation. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityKernel.html b/doc/Cupti/structCUpti__ActivityKernel.html new file mode 100644 index 0000000000000000000000000000000000000000..efab5eefcf12f66286979a6a6aa38728815ad533 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityKernel.html @@ -0,0 +1,449 @@ + + +Cupti: CUpti_ActivityKernel Struct Reference + + + + + +
+

CUpti_ActivityKernel Struct Reference
+ +[CUPTI Activity API] +

The activity record for kernel. (deprecated). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

int32_t blockX
int32_t blockY
int32_t blockZ
uint8_t cacheConfigExecuted
uint8_t cacheConfigRequested
uint32_t contextId
uint32_t correlationId
uint32_t deviceId
int32_t dynamicSharedMemory
uint64_t end
int32_t gridX
int32_t gridY
int32_t gridZ
CUpti_ActivityKind kind
uint32_t localMemoryPerThread
uint32_t localMemoryTotal
const char * name
uint32_t pad
uint16_t registersPerThread
void * reserved0
uint32_t runtimeCorrelationId
uint64_t start
int32_t staticSharedMemory
uint32_t streamId
+


Detailed Description

+This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL) but is no longer generated by CUPTI. Kernel activities are now reported using the CUpti_ActivityKernel8 activity record.

Field Documentation

+ +
+
+ + + + +
int32_t CUpti_ActivityKernel::blockX
+
+
+ +

+The X-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel::blockY
+
+
+ +

+The Y-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel::blockZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+ +
+ +

+The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+ +
+ +

+The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel::contextId
+
+
+ +

+The ID of the context where the kernel is executing. +

+

+ +

+ +
+ +

+The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation ID in the driver API activity record that launched the kernel. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel::deviceId
+
+
+ +

+The ID of the device where the kernel is executing. +

+

+ +

+ +
+ +

+The dynamic shared memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel::end
+
+
+ +

+The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel::gridX
+
+
+ +

+The X-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel::gridY
+
+
+ +

+The Y-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel::gridZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL. +

+

+ +

+ +
+ +

+The amount of local memory reserved for each thread, in bytes. +

+

+ +

+ +
+ +

+The total amount of local memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityKernel::name
+
+
+ +

+The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be modified. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The number of registers required for each thread executing the kernel. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The runtime correlation ID of the kernel. Each kernel execution is assigned a unique runtime correlation ID that is identical to the correlation ID in the runtime API activity record that launched the kernel. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel::start
+
+
+ +

+The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+ +
+ +

+The static shared memory allocated for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel::streamId
+
+
+ +

+The ID of the stream where the kernel is executing. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityKernel2.html b/doc/Cupti/structCUpti__ActivityKernel2.html new file mode 100644 index 0000000000000000000000000000000000000000..8fe7218fe00ad96129b755b0be7d23b320b63ac4 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityKernel2.html @@ -0,0 +1,466 @@ + + +Cupti: CUpti_ActivityKernel2 Struct Reference + + + + + +
+

CUpti_ActivityKernel2 Struct Reference
+ +[CUPTI Activity API] +

The activity record for kernel. (deprecated). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

int32_t blockX
int32_t blockY
int32_t blockZ
uint64_t completed
uint32_t contextId
uint32_t correlationId
uint32_t deviceId
int32_t dynamicSharedMemory
uint64_t end
int64_t gridId
int32_t gridX
int32_t gridY
int32_t gridZ
CUpti_ActivityKind kind
uint32_t localMemoryPerThread
uint32_t localMemoryTotal
const char * name
uint16_t registersPerThread
void * reserved0
uint8_t sharedMemoryConfig
uint64_t start
int32_t staticSharedMemory
uint32_t streamId
uint8_t executed:4
uint8_t requested:4
+


Detailed Description

+This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL) but is no longer generated by CUPTI. Kernel activities are now reported using the CUpti_ActivityKernel8 activity record.

Field Documentation

+ +
+
+ + + + +
int32_t CUpti_ActivityKernel2::blockX
+
+
+ +

+The X-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel2::blockY
+
+
+ +

+The Y-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel2::blockZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel2::completed
+
+
+ +

+The completed timestamp for the kernel execution, in ns. It represents the completion of all it's child kernels and the kernel itself. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion time is unknown. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel2::contextId
+
+
+ +

+The ID of the context where the kernel is executing. +

+

+ +

+ +
+ +

+The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation ID in the driver or runtime API activity record that launched the kernel. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel2::deviceId
+
+
+ +

+The ID of the device where the kernel is executing. +

+

+ +

+ +
+ +

+The dynamic shared memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel2::end
+
+
+ +

+The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityKernel2::executed
+
+
+ +

+The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+
+ + + + +
int64_t CUpti_ActivityKernel2::gridId
+
+
+ +

+The grid ID of the kernel. Each kernel is assigned a unique grid ID at runtime. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel2::gridX
+
+
+ +

+The X-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel2::gridY
+
+
+ +

+The Y-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel2::gridZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL. +

+

+ +

+ +
+ +

+The amount of local memory reserved for each thread, in bytes. +

+

+ +

+ +
+ +

+The total amount of local memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityKernel2::name
+
+
+ +

+The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be modified. +

+

+ +

+ +
+ +

+The number of registers required for each thread executing the kernel. +

+

+ +

+ +
+ +

+The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel2::start
+
+
+ +

+The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+ +
+ +

+The static shared memory allocated for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel2::streamId
+
+
+ +

+The ID of the stream where the kernel is executing. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityKernel3.html b/doc/Cupti/structCUpti__ActivityKernel3.html new file mode 100644 index 0000000000000000000000000000000000000000..7f18b0d57388ad97d63f85d93beccafd52b890de --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityKernel3.html @@ -0,0 +1,500 @@ + + +Cupti: CUpti_ActivityKernel3 Struct Reference + + + + + +
+

CUpti_ActivityKernel3 Struct Reference
+ +[CUPTI Activity API] +

The activity record for a kernel (CUDA 6.5(with sm_52 support) onwards). (deprecated in CUDA 9.0). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

int32_t blockX
int32_t blockY
int32_t blockZ
uint64_t completed
uint32_t contextId
uint32_t correlationId
uint32_t deviceId
int32_t dynamicSharedMemory
uint64_t end
int64_t gridId
int32_t gridX
int32_t gridY
int32_t gridZ
CUpti_ActivityKind kind
uint32_t localMemoryPerThread
uint32_t localMemoryTotal
const char * name
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheExecuted
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheRequested
uint16_t registersPerThread
void * reserved0
uint8_t sharedMemoryConfig
uint64_t start
int32_t staticSharedMemory
uint32_t streamId
uint8_t executed:4
uint8_t requested:4
+


Detailed Description

+This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL). Kernel activities are now reported using the CUpti_ActivityKernel8 activity record.

Field Documentation

+ +
+
+ + + + +
int32_t CUpti_ActivityKernel3::blockX
+
+
+ +

+The X-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel3::blockY
+
+
+ +

+The Y-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel3::blockZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel3::completed
+
+
+ +

+The completed timestamp for the kernel execution, in ns. It represents the completion of all it's child kernels and the kernel itself. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion time is unknown. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel3::contextId
+
+
+ +

+The ID of the context where the kernel is executing. +

+

+ +

+ +
+ +

+The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation ID in the driver or runtime API activity record that launched the kernel. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel3::deviceId
+
+
+ +

+The ID of the device where the kernel is executing. +

+

+ +

+ +
+ +

+The dynamic shared memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel3::end
+
+
+ +

+The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityKernel3::executed
+
+
+ +

+The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+
+ + + + +
int64_t CUpti_ActivityKernel3::gridId
+
+
+ +

+The grid ID of the kernel. Each kernel is assigned a unique grid ID at runtime. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel3::gridX
+
+
+ +

+The X-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel3::gridY
+
+
+ +

+The Y-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel3::gridZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL. +

+

+ +

+ +
+ +

+The amount of local memory reserved for each thread, in bytes. +

+

+ +

+ +
+ +

+The total amount of local memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityKernel3::name
+
+
+ +

+The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be modified. +

+

+ +

+ +
+ +

+The partitioned global caching executed for the kernel. Partitioned global caching is required to enable caching on certain chips, such as devices with compute capability 5.2. Partitioned global caching can be automatically disabled if the occupancy requirement of the launch cannot support caching. +

+

+ +

+ +
+ +

+The partitioned global caching requested for the kernel. Partitioned global caching is required to enable caching on certain chips, such as devices with compute capability 5.2. +

+

+ +

+ +
+ +

+The number of registers required for each thread executing the kernel. +

+

+ +

+ +
+ +

+The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel3::start
+
+
+ +

+The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+ +
+ +

+The static shared memory allocated for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel3::streamId
+
+
+ +

+The ID of the stream where the kernel is executing. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityKernel4.html b/doc/Cupti/structCUpti__ActivityKernel4.html new file mode 100644 index 0000000000000000000000000000000000000000..2724a6deda9fa61a6cbce68a6d74ada21c4131b7 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityKernel4.html @@ -0,0 +1,640 @@ + + +Cupti: CUpti_ActivityKernel4 Struct Reference + + + + + +
+

CUpti_ActivityKernel4 Struct Reference
+ +[CUPTI Activity API] +

The activity record for a kernel (CUDA 9.0(with sm_70 support) onwards). (deprecated in CUDA 11.0). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

int32_t blockX
int32_t blockY
int32_t blockZ
union {
cacheConfig
uint64_t completed
uint32_t contextId
uint32_t correlationId
uint32_t deviceId
int32_t dynamicSharedMemory
uint64_t end
int64_t gridId
int32_t gridX
int32_t gridY
int32_t gridZ
uint8_t isSharedMemoryCarveoutRequested
CUpti_ActivityKind kind
uint8_t launchType
uint32_t localMemoryPerThread
uint32_t localMemoryTotal
const char * name
uint8_t padding
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheExecuted
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheRequested
uint64_t queued
uint16_t registersPerThread
void * reserved0
uint8_t sharedMemoryCarveoutRequested
uint8_t sharedMemoryConfig
uint32_t sharedMemoryExecuted
uint64_t start
int32_t staticSharedMemory
uint32_t streamId
uint64_t submitted
uint8_t executed:4
uint8_t requested:4
+


Detailed Description

+This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL). Kernel activities are now reported using the CUpti_ActivityKernel8 activity record.

Field Documentation

+ +
+
+ + + + +
int32_t CUpti_ActivityKernel4::blockX
+
+
+ +

+The X-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel4::blockY
+
+
+ +

+The Y-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel4::blockZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
union { ... } CUpti_ActivityKernel4::cacheConfig
+
+
+ +

+For devices with compute capability 7.0+ cacheConfig values are not updated in case field isSharedMemoryCarveoutRequested is set +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel4::completed
+
+
+ +

+The completed timestamp for the kernel execution, in ns. It represents the completion of all it's child kernels and the kernel itself. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion time is unknown. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel4::contextId
+
+
+ +

+The ID of the context where the kernel is executing. +

+

+ +

+ +
+ +

+The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation ID in the driver or runtime API activity record that launched the kernel. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel4::deviceId
+
+
+ +

+The ID of the device where the kernel is executing. +

+

+ +

+ +
+ +

+The dynamic shared memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel4::end
+
+
+ +

+The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityKernel4::executed
+
+
+ +

+The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+
+ + + + +
int64_t CUpti_ActivityKernel4::gridId
+
+
+ +

+The grid ID of the kernel. Each kernel is assigned a unique grid ID at runtime. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel4::gridX
+
+
+ +

+The X-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel4::gridY
+
+
+ +

+The Y-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel4::gridZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+ +
+ +

+This indicates if CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT was updated for the kernel launch +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL. +

+

+ +

+ +
+ +

+The indicates if the kernel was executed via a regular launch or via a single/multi device cooperative launch.

See also:
CUpti_ActivityLaunchType
+ +
+

+ +

+ +
+ +

+The amount of local memory reserved for each thread, in bytes. +

+

+ +

+ +
+ +

+The total amount of local memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityKernel4::name
+
+
+ +

+The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be modified. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityKernel4::padding
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The partitioned global caching executed for the kernel. Partitioned global caching is required to enable caching on certain chips, such as devices with compute capability 5.2. Partitioned global caching can be automatically disabled if the occupancy requirement of the launch cannot support caching. +

+

+ +

+ +
+ +

+The partitioned global caching requested for the kernel. Partitioned global caching is required to enable caching on certain chips, such as devices with compute capability 5.2. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel4::queued
+
+
+ +

+The timestamp when the kernel is queued up in the command buffer, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the queued time could not be collected for the kernel. This timestamp is not collected by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection.

+Command buffer is a buffer written by CUDA driver to send commands like kernel launch, memory copy etc to the GPU. All launches of CUDA kernels are asynchrnous with respect to the host, the host requests the launch by writing commands into the command buffer, then returns without checking the GPU's progress. +

+

+ +

+ +
+ +

+The number of registers required for each thread executing the kernel. +

+

+ +

+ +
+ +

+The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+Shared memory carveout value requested for the function in percentage of the total resource. The value will be updated only if field isSharedMemoryCarveoutRequested is set. +

+

+ +

+ +
+ +

+The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. +

+

+ +

+ +
+ +

+Shared memory size set by the driver. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel4::start
+
+
+ +

+The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+ +
+ +

+The static shared memory allocated for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel4::streamId
+
+
+ +

+The ID of the stream where the kernel is executing. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel4::submitted
+
+
+ +

+The timestamp when the command buffer containing the kernel launch is submitted to the GPU, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the submitted time could not be collected for the kernel. This timestamp is not collected by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityKernel5.html b/doc/Cupti/structCUpti__ActivityKernel5.html new file mode 100644 index 0000000000000000000000000000000000000000..c0f1e9f8f49e1552f2ce691af8090bbf8d44310b --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityKernel5.html @@ -0,0 +1,690 @@ + + +Cupti: CUpti_ActivityKernel5 Struct Reference + + + + + +
+

CUpti_ActivityKernel5 Struct Reference
+ +[CUPTI Activity API] +

The activity record for a kernel (CUDA 11.0(with sm_80 support) onwards). (deprecated in CUDA 11.2) This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL) but is no longer generated by CUPTI. Kernel activities are now reported using the CUpti_ActivityKernel8 activity record. + +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

int32_t blockX
int32_t blockY
int32_t blockZ
union {
cacheConfig
uint64_t completed
uint32_t contextId
uint32_t correlationId
uint32_t deviceId
int32_t dynamicSharedMemory
uint64_t end
uint32_t graphId
uint64_t graphNodeId
int64_t gridId
int32_t gridX
int32_t gridY
int32_t gridZ
uint8_t isSharedMemoryCarveoutRequested
CUpti_ActivityKind kind
uint8_t launchType
uint32_t localMemoryPerThread
uint32_t localMemoryTotal
const char * name
uint8_t padding
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheExecuted
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheRequested
uint64_t queued
uint16_t registersPerThread
void * reserved0
uint8_t sharedMemoryCarveoutRequested
uint8_t sharedMemoryConfig
uint32_t sharedMemoryExecuted
CUpti_FuncShmemLimitConfig shmemLimitConfig
uint64_t start
int32_t staticSharedMemory
uint32_t streamId
uint64_t submitted
uint8_t executed:4
uint8_t requested:4
+


Field Documentation

+ +
+
+ + + + +
int32_t CUpti_ActivityKernel5::blockX
+
+
+ +

+The X-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel5::blockY
+
+
+ +

+The Y-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel5::blockZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
union { ... } CUpti_ActivityKernel5::cacheConfig
+
+
+ +

+For devices with compute capability 7.0+ cacheConfig values are not updated in case field isSharedMemoryCarveoutRequested is set +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel5::completed
+
+
+ +

+The completed timestamp for the kernel execution, in ns. It represents the completion of all it's child kernels and the kernel itself. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion time is unknown. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel5::contextId
+
+
+ +

+The ID of the context where the kernel is executing. +

+

+ +

+ +
+ +

+The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation ID in the driver or runtime API activity record that launched the kernel. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel5::deviceId
+
+
+ +

+The ID of the device where the kernel is executing. +

+

+ +

+ +
+ +

+The dynamic shared memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel5::end
+
+
+ +

+The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityKernel5::executed
+
+
+ +

+The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel5::graphId
+
+
+ +

+The unique ID of the graph that launched this kernel through graph launch APIs. This field will be 0 if the kernel is not launched through graph launch APIs. +

+

+ +

+ +
+ +

+The unique ID of the graph node that launched this kernel through graph launch APIs. This field will be 0 if the kernel is not launched through graph launch APIs. +

+

+ +

+
+ + + + +
int64_t CUpti_ActivityKernel5::gridId
+
+
+ +

+The grid ID of the kernel. Each kernel is assigned a unique grid ID at runtime. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel5::gridX
+
+
+ +

+The X-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel5::gridY
+
+
+ +

+The Y-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel5::gridZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+ +
+ +

+This indicates if CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT was updated for the kernel launch +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL. +

+

+ +

+ +
+ +

+The indicates if the kernel was executed via a regular launch or via a single/multi device cooperative launch.

See also:
CUpti_ActivityLaunchType
+ +
+

+ +

+ +
+ +

+The amount of local memory reserved for each thread, in bytes. +

+

+ +

+ +
+ +

+The total amount of local memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityKernel5::name
+
+
+ +

+The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be modified. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityKernel5::padding
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The partitioned global caching executed for the kernel. Partitioned global caching is required to enable caching on certain chips, such as devices with compute capability 5.2. Partitioned global caching can be automatically disabled if the occupancy requirement of the launch cannot support caching. +

+

+ +

+ +
+ +

+The partitioned global caching requested for the kernel. Partitioned global caching is required to enable caching on certain chips, such as devices with compute capability 5.2. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel5::queued
+
+
+ +

+The timestamp when the kernel is queued up in the command buffer, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the queued time could not be collected for the kernel. This timestamp is not collected by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection.

+Command buffer is a buffer written by CUDA driver to send commands like kernel launch, memory copy etc to the GPU. All launches of CUDA kernels are asynchrnous with respect to the host, the host requests the launch by writing commands into the command buffer, then returns without checking the GPU's progress. +

+

+ +

+ +
+ +

+The number of registers required for each thread executing the kernel. +

+

+ +

+ +
+ +

+The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+Shared memory carveout value requested for the function in percentage of the total resource. The value will be updated only if field isSharedMemoryCarveoutRequested is set. +

+

+ +

+ +
+ +

+The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. +

+

+ +

+ +
+ +

+Shared memory size set by the driver. +

+

+ +

+ +
+ +

+The shared memory limit config for the kernel. This field shows whether user has opted for a higher per block limit of dynamic shared memory. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel5::start
+
+
+ +

+The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+ +
+ +

+The static shared memory allocated for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel5::streamId
+
+
+ +

+The ID of the stream where the kernel is executing. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel5::submitted
+
+
+ +

+The timestamp when the command buffer containing the kernel launch is submitted to the GPU, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the submitted time could not be collected for the kernel. This timestamp is not collected by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityKernel6.html b/doc/Cupti/structCUpti__ActivityKernel6.html new file mode 100644 index 0000000000000000000000000000000000000000..ca64674d95391d186e2a118e78c9aafbada7f021 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityKernel6.html @@ -0,0 +1,708 @@ + + +Cupti: CUpti_ActivityKernel6 Struct Reference + + + + + +
+

CUpti_ActivityKernel6 Struct Reference
+ +[CUPTI Activity API] +

The activity record for kernel. (deprecated in CUDA 11.6). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

int32_t blockX
int32_t blockY
int32_t blockZ
union {
cacheConfig
uint64_t completed
uint32_t contextId
uint32_t correlationId
uint32_t deviceId
int32_t dynamicSharedMemory
uint64_t end
uint32_t graphId
uint64_t graphNodeId
int64_t gridId
int32_t gridX
int32_t gridY
int32_t gridZ
uint8_t isSharedMemoryCarveoutRequested
CUpti_ActivityKind kind
uint8_t launchType
uint32_t localMemoryPerThread
uint32_t localMemoryTotal
const char * name
CUaccessPolicyWindow * pAccessPolicyWindow
uint8_t padding
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheExecuted
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheRequested
uint64_t queued
uint16_t registersPerThread
void * reserved0
uint8_t sharedMemoryCarveoutRequested
uint8_t sharedMemoryConfig
uint32_t sharedMemoryExecuted
CUpti_FuncShmemLimitConfig shmemLimitConfig
uint64_t start
int32_t staticSharedMemory
uint32_t streamId
uint64_t submitted
uint8_t executed:4
uint8_t requested:4
+


Detailed Description

+This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL) but is no longer generated by CUPTI. Kernel activities are now reported using the CUpti_ActivityKernel8 activity record.

Field Documentation

+ +
+
+ + + + +
int32_t CUpti_ActivityKernel6::blockX
+
+
+ +

+The X-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel6::blockY
+
+
+ +

+The Y-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel6::blockZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
union { ... } CUpti_ActivityKernel6::cacheConfig
+
+
+ +

+For devices with compute capability 7.0+ cacheConfig values are not updated in case field isSharedMemoryCarveoutRequested is set +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel6::completed
+
+
+ +

+The completed timestamp for the kernel execution, in ns. It represents the completion of all it's child kernels and the kernel itself. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion time is unknown. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel6::contextId
+
+
+ +

+The ID of the context where the kernel is executing. +

+

+ +

+ +
+ +

+The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation ID in the driver or runtime API activity record that launched the kernel. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel6::deviceId
+
+
+ +

+The ID of the device where the kernel is executing. +

+

+ +

+ +
+ +

+The dynamic shared memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel6::end
+
+
+ +

+The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityKernel6::executed
+
+
+ +

+The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel6::graphId
+
+
+ +

+The unique ID of the graph that launched this kernel through graph launch APIs. This field will be 0 if the kernel is not launched through graph launch APIs. +

+

+ +

+ +
+ +

+The unique ID of the graph node that launched this kernel through graph launch APIs. This field will be 0 if the kernel is not launched through graph launch APIs. +

+

+ +

+
+ + + + +
int64_t CUpti_ActivityKernel6::gridId
+
+
+ +

+The grid ID of the kernel. Each kernel is assigned a unique grid ID at runtime. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel6::gridX
+
+
+ +

+The X-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel6::gridY
+
+
+ +

+The Y-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel6::gridZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+ +
+ +

+This indicates if CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT was updated for the kernel launch +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL. +

+

+ +

+ +
+ +

+The indicates if the kernel was executed via a regular launch or via a single/multi device cooperative launch.

See also:
CUpti_ActivityLaunchType
+ +
+

+ +

+ +
+ +

+The amount of local memory reserved for each thread, in bytes. +

+

+ +

+ +
+ +

+The total amount of local memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityKernel6::name
+
+
+ +

+The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be modified. +

+

+ +

+
+ + + + +
CUaccessPolicyWindow* CUpti_ActivityKernel6::pAccessPolicyWindow
+
+
+ +

+The pointer to the access policy window. The structure CUaccessPolicyWindow is defined in cuda.h. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityKernel6::padding
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The partitioned global caching executed for the kernel. Partitioned global caching is required to enable caching on certain chips, such as devices with compute capability 5.2. Partitioned global caching can be automatically disabled if the occupancy requirement of the launch cannot support caching. +

+

+ +

+ +
+ +

+The partitioned global caching requested for the kernel. Partitioned global caching is required to enable caching on certain chips, such as devices with compute capability 5.2. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel6::queued
+
+
+ +

+The timestamp when the kernel is queued up in the command buffer, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the queued time could not be collected for the kernel. This timestamp is not collected by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection.

+Command buffer is a buffer written by CUDA driver to send commands like kernel launch, memory copy etc to the GPU. All launches of CUDA kernels are asynchrnous with respect to the host, the host requests the launch by writing commands into the command buffer, then returns without checking the GPU's progress. +

+

+ +

+ +
+ +

+The number of registers required for each thread executing the kernel. +

+

+ +

+ +
+ +

+The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+Shared memory carveout value requested for the function in percentage of the total resource. The value will be updated only if field isSharedMemoryCarveoutRequested is set. +

+

+ +

+ +
+ +

+The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. +

+

+ +

+ +
+ +

+Shared memory size set by the driver. +

+

+ +

+ +
+ +

+The shared memory limit config for the kernel. This field shows whether user has opted for a higher per block limit of dynamic shared memory. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel6::start
+
+
+ +

+The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+ +
+ +

+The static shared memory allocated for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel6::streamId
+
+
+ +

+The ID of the stream where the kernel is executing. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel6::submitted
+
+
+ +

+The timestamp when the command buffer containing the kernel launch is submitted to the GPU, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the submitted time could not be collected for the kernel. This timestamp is not collected by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityKernel7.html b/doc/Cupti/structCUpti__ActivityKernel7.html new file mode 100644 index 0000000000000000000000000000000000000000..43881d2d4f6e020dcce479a27f5dd17f869cf76b --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityKernel7.html @@ -0,0 +1,742 @@ + + +Cupti: CUpti_ActivityKernel7 Struct Reference + + + + + +
+

CUpti_ActivityKernel7 Struct Reference
+ +[CUPTI Activity API] +

The activity record for kernel. (deprecated in CUDA 11.8). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

int32_t blockX
int32_t blockY
int32_t blockZ
union {
cacheConfig
uint32_t channelID
CUpti_ChannelType channelType
uint64_t completed
uint32_t contextId
uint32_t correlationId
uint32_t deviceId
int32_t dynamicSharedMemory
uint64_t end
uint32_t graphId
uint64_t graphNodeId
int64_t gridId
int32_t gridX
int32_t gridY
int32_t gridZ
uint8_t isSharedMemoryCarveoutRequested
CUpti_ActivityKind kind
uint8_t launchType
uint32_t localMemoryPerThread
uint32_t localMemoryTotal
const char * name
CUaccessPolicyWindow * pAccessPolicyWindow
uint8_t padding
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheExecuted
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheRequested
uint64_t queued
uint16_t registersPerThread
void * reserved0
uint8_t sharedMemoryCarveoutRequested
uint8_t sharedMemoryConfig
uint32_t sharedMemoryExecuted
CUpti_FuncShmemLimitConfig shmemLimitConfig
uint64_t start
int32_t staticSharedMemory
uint32_t streamId
uint64_t submitted
uint8_t executed:4
uint8_t requested:4
+


Detailed Description

+This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL) but is no longer generated by CUPTI. Kernel activities are now reported using the CUpti_ActivityKernel8 activity record.

Field Documentation

+ +
+
+ + + + +
int32_t CUpti_ActivityKernel7::blockX
+
+
+ +

+The X-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel7::blockY
+
+
+ +

+The Y-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel7::blockZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
union { ... } CUpti_ActivityKernel7::cacheConfig
+
+
+ +

+For devices with compute capability 7.0+ cacheConfig values are not updated in case field isSharedMemoryCarveoutRequested is set +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel7::channelID
+
+
+ +

+The ID of the HW channel on which the kernel is launched. +

+

+ +

+
+ + + + +
CUpti_ChannelType CUpti_ActivityKernel7::channelType
+
+
+ +

+The type of the channel +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel7::completed
+
+
+ +

+The completed timestamp for the kernel execution, in ns. It represents the completion of all it's child kernels and the kernel itself. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion time is unknown. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel7::contextId
+
+
+ +

+The ID of the context where the kernel is executing. +

+

+ +

+ +
+ +

+The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation ID in the driver or runtime API activity record that launched the kernel. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel7::deviceId
+
+
+ +

+The ID of the device where the kernel is executing. +

+

+ +

+ +
+ +

+The dynamic shared memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel7::end
+
+
+ +

+The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityKernel7::executed
+
+
+ +

+The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel7::graphId
+
+
+ +

+The unique ID of the graph that launched this kernel through graph launch APIs. This field will be 0 if the kernel is not launched through graph launch APIs. +

+

+ +

+ +
+ +

+The unique ID of the graph node that launched this kernel through graph launch APIs. This field will be 0 if the kernel is not launched through graph launch APIs. +

+

+ +

+
+ + + + +
int64_t CUpti_ActivityKernel7::gridId
+
+
+ +

+The grid ID of the kernel. Each kernel is assigned a unique grid ID at runtime. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel7::gridX
+
+
+ +

+The X-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel7::gridY
+
+
+ +

+The Y-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel7::gridZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+ +
+ +

+This indicates if CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT was updated for the kernel launch +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL. +

+

+ +

+ +
+ +

+The indicates if the kernel was executed via a regular launch or via a single/multi device cooperative launch.

See also:
CUpti_ActivityLaunchType
+ +
+

+ +

+ +
+ +

+The amount of local memory reserved for each thread, in bytes. +

+

+ +

+ +
+ +

+The total amount of local memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityKernel7::name
+
+
+ +

+The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be modified. +

+

+ +

+
+ + + + +
CUaccessPolicyWindow* CUpti_ActivityKernel7::pAccessPolicyWindow
+
+
+ +

+The pointer to the access policy window. The structure CUaccessPolicyWindow is defined in cuda.h. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityKernel7::padding
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The partitioned global caching executed for the kernel. Partitioned global caching is required to enable caching on certain chips, such as devices with compute capability 5.2. Partitioned global caching can be automatically disabled if the occupancy requirement of the launch cannot support caching. +

+

+ +

+ +
+ +

+The partitioned global caching requested for the kernel. Partitioned global caching is required to enable caching on certain chips, such as devices with compute capability 5.2. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel7::queued
+
+
+ +

+The timestamp when the kernel is queued up in the command buffer, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the queued time could not be collected for the kernel. This timestamp is not collected by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection.

+Command buffer is a buffer written by CUDA driver to send commands like kernel launch, memory copy etc to the GPU. All launches of CUDA kernels are asynchrnous with respect to the host, the host requests the launch by writing commands into the command buffer, then returns without checking the GPU's progress. +

+

+ +

+ +
+ +

+The number of registers required for each thread executing the kernel. +

+

+ +

+ +
+ +

+The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+Shared memory carveout value requested for the function in percentage of the total resource. The value will be updated only if field isSharedMemoryCarveoutRequested is set. +

+

+ +

+ +
+ +

+The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. +

+

+ +

+ +
+ +

+Shared memory size set by the driver. +

+

+ +

+ +
+ +

+The shared memory limit config for the kernel. This field shows whether user has opted for a higher per block limit of dynamic shared memory. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel7::start
+
+
+ +

+The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+ +
+ +

+The static shared memory allocated for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel7::streamId
+
+
+ +

+The ID of the stream where the kernel is executing. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel7::submitted
+
+
+ +

+The timestamp when the command buffer containing the kernel launch is submitted to the GPU, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the submitted time could not be collected for the kernel. This timestamp is not collected by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityKernel8.html b/doc/Cupti/structCUpti__ActivityKernel8.html new file mode 100644 index 0000000000000000000000000000000000000000..2eb73e922a44f11c3f9c3595546248fac92c279c --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityKernel8.html @@ -0,0 +1,827 @@ + + +Cupti: CUpti_ActivityKernel8 Struct Reference + + + + + +
+

CUpti_ActivityKernel8 Struct Reference
+ +[CUPTI Activity API] +

The activity record for kernel. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

int32_t blockX
int32_t blockY
int32_t blockZ
union {
cacheConfig
uint32_t channelID
CUpti_ChannelType channelType
uint32_t clusterSchedulingPolicy
uint32_t clusterX
uint32_t clusterY
uint32_t clusterZ
uint64_t completed
uint32_t contextId
uint32_t correlationId
uint32_t deviceId
int32_t dynamicSharedMemory
uint64_t end
uint32_t graphId
uint64_t graphNodeId
int64_t gridId
int32_t gridX
int32_t gridY
int32_t gridZ
uint8_t isSharedMemoryCarveoutRequested
CUpti_ActivityKind kind
uint8_t launchType
uint32_t localMemoryPerThread
uint32_t localMemoryTotal
uint64_t localMemoryTotal_v2
const char * name
CUaccessPolicyWindow * pAccessPolicyWindow
uint8_t padding
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheExecuted
CUpti_ActivityPartitionedGlobalCacheConfig partitionedGlobalCacheRequested
uint64_t queued
uint16_t registersPerThread
void * reserved0
uint8_t sharedMemoryCarveoutRequested
uint8_t sharedMemoryConfig
uint32_t sharedMemoryExecuted
CUpti_FuncShmemLimitConfig shmemLimitConfig
uint64_t start
int32_t staticSharedMemory
uint32_t streamId
uint64_t submitted
uint8_t executed:4
uint8_t requested:4
+


Detailed Description

+This activity record represents a kernel execution (CUPTI_ACTIVITY_KIND_KERNEL and CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL)

Field Documentation

+ +
+
+ + + + +
int32_t CUpti_ActivityKernel8::blockX
+
+
+ +

+The X-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel8::blockY
+
+
+ +

+The Y-dimension block size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel8::blockZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
union { ... } CUpti_ActivityKernel8::cacheConfig
+
+
+ +

+For devices with compute capability 7.0+ cacheConfig values are not updated in case field isSharedMemoryCarveoutRequested is set +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel8::channelID
+
+
+ +

+The ID of the HW channel on which the kernel is launched. +

+

+ +

+
+ + + + +
CUpti_ChannelType CUpti_ActivityKernel8::channelType
+
+
+ +

+The type of the channel +

+

+ +

+ +
+ +

+The cluster scheduling policy for the kernel. Refer CUclusterSchedulingPolicy Field is valid for devices with compute capability 9.0 and higher +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel8::clusterX
+
+
+ +

+The X-dimension cluster size for the kernel. Field is valid for devices with compute capability 9.0 and higher +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel8::clusterY
+
+
+ +

+The Y-dimension cluster size for the kernel. Field is valid for devices with compute capability 9.0 and higher +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel8::clusterZ
+
+
+ +

+The Z-dimension cluster size for the kernel. Field is valid for devices with compute capability 9.0 and higher +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel8::completed
+
+
+ +

+The completed timestamp for the kernel execution, in ns. It represents the completion of all it's child kernels and the kernel itself. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the completion time is unknown. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel8::contextId
+
+
+ +

+The ID of the context where the kernel is executing. +

+

+ +

+ +
+ +

+The correlation ID of the kernel. Each kernel execution is assigned a unique correlation ID that is identical to the correlation ID in the driver or runtime API activity record that launched the kernel. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel8::deviceId
+
+
+ +

+The ID of the device where the kernel is executing. +

+

+ +

+ +
+ +

+The dynamic shared memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel8::end
+
+
+ +

+The end timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityKernel8::executed
+
+
+ +

+The cache configuration used for the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel8::graphId
+
+
+ +

+The unique ID of the graph that launched this kernel through graph launch APIs. This field will be 0 if the kernel is not launched through graph launch APIs. +

+

+ +

+ +
+ +

+The unique ID of the graph node that launched this kernel through graph launch APIs. This field will be 0 if the kernel is not launched through graph launch APIs. +

+

+ +

+
+ + + + +
int64_t CUpti_ActivityKernel8::gridId
+
+
+ +

+The grid ID of the kernel. Each kernel is assigned a unique grid ID at runtime. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel8::gridX
+
+
+ +

+The X-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel8::gridY
+
+
+ +

+The Y-dimension grid size for the kernel. +

+

+ +

+
+ + + + +
int32_t CUpti_ActivityKernel8::gridZ
+
+
+ +

+The Z-dimension grid size for the kernel. +

+

+ +

+ +
+ +

+This indicates if CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT was updated for the kernel launch +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_KERNEL or CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL. +

+

+ +

+ +
+ +

+The indicates if the kernel was executed via a regular launch or via a single/multi device cooperative launch.

See also:
CUpti_ActivityLaunchType
+ +
+

+ +

+ +
+ +

+The amount of local memory reserved for each thread, in bytes. +

+

+ +

+ +
+ +

+The total amount of local memory reserved for the kernel, in bytes (deprecated in CUDA 11.8). Refer field localMemoryTotal_v2 +

+

+ +

+ +
+ +

+The total amount of local memory reserved for the kernel, in bytes. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityKernel8::name
+
+
+ +

+The name of the kernel. This name is shared across all activity records representing the same kernel, and so should not be modified. +

+

+ +

+
+ + + + +
CUaccessPolicyWindow* CUpti_ActivityKernel8::pAccessPolicyWindow
+
+
+ +

+The pointer to the access policy window. The structure CUaccessPolicyWindow is defined in cuda.h. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityKernel8::padding
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The partitioned global caching executed for the kernel. Partitioned global caching is required to enable caching on certain chips, such as devices with compute capability 5.2. Partitioned global caching can be automatically disabled if the occupancy requirement of the launch cannot support caching. +

+

+ +

+ +
+ +

+The partitioned global caching requested for the kernel. Partitioned global caching is required to enable caching on certain chips, such as devices with compute capability 5.2. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel8::queued
+
+
+ +

+The timestamp when the kernel is queued up in the command buffer, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the queued time could not be collected for the kernel. This timestamp is not collected by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection.

+Command buffer is a buffer written by CUDA driver to send commands like kernel launch, memory copy etc to the GPU. All launches of CUDA kernels are asynchrnous with respect to the host, the host requests the launch by writing commands into the command buffer, then returns without checking the GPU's progress. +

+

+ +

+ +
+ +

+The number of registers required for each thread executing the kernel. +

+

+ +

+ +
+ +

+The cache configuration requested by the kernel. The value is one of the CUfunc_cache enumeration values from cuda.h. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+Shared memory carveout value requested for the function in percentage of the total resource. The value will be updated only if field isSharedMemoryCarveoutRequested is set. +

+

+ +

+ +
+ +

+The shared memory configuration used for the kernel. The value is one of the CUsharedconfig enumeration values from cuda.h. +

+

+ +

+ +
+ +

+Shared memory size set by the driver. +

+

+ +

+ +
+ +

+The shared memory limit config for the kernel. This field shows whether user has opted for a higher per block limit of dynamic shared memory. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel8::start
+
+
+ +

+The start timestamp for the kernel execution, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the kernel. +

+

+ +

+ +
+ +

+The static shared memory allocated for the kernel, in bytes. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityKernel8::streamId
+
+
+ +

+The ID of the stream where the kernel is executing. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityKernel8::submitted
+
+
+ +

+The timestamp when the command buffer containing the kernel launch is submitted to the GPU, in ns. A value of CUPTI_TIMESTAMP_UNKNOWN indicates that the submitted time could not be collected for the kernel. This timestamp is not collected by default. Use API cuptiActivityEnableLatencyTimestamps() to enable collection. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMarker.html b/doc/Cupti/structCUpti__ActivityMarker.html new file mode 100644 index 0000000000000000000000000000000000000000..b16cbaffce186243c83fc3da7c075479844f8c59 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMarker.html @@ -0,0 +1,161 @@ + + +Cupti: CUpti_ActivityMarker Struct Reference + + + + + +
+

CUpti_ActivityMarker Struct Reference
+ +[CUPTI Activity API] +

The activity record providing a marker which is an instantaneous point in time. (deprecated in CUDA 8.0). +More... +

+ + + + + + + + + + + + + + + + + +

Data Fields

CUpti_ActivityFlag flags
uint32_t id
CUpti_ActivityKind kind
const char * name
CUpti_ActivityObjectKindId objectId
CUpti_ActivityObjectKind objectKind
uint64_t timestamp
+


Detailed Description

+The marker is specified with a descriptive name and unique id (CUPTI_ACTIVITY_KIND_MARKER). Marker activity is now reported using the CUpti_ActivityMarker2 activity record.

Field Documentation

+ +
+ +
+ +

+The flags associated with the marker.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMarker::id
+
+
+ +

+The marker ID. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MARKER. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityMarker::name
+
+
+ +

+The marker name for an instantaneous or start marker. This will be NULL for an end marker. +

+

+ +

+ +
+ +

+The identifier for the activity object associated with this marker. 'objectKind' indicates which ID is valid for this record. +

+

+ +

+ +
+ +

+The kind of activity object associated with this marker. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMarker::timestamp
+
+
+ +

+The timestamp for the marker, in ns. A value of 0 indicates that timestamp information could not be collected for the marker. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMarker2.html b/doc/Cupti/structCUpti__ActivityMarker2.html new file mode 100644 index 0000000000000000000000000000000000000000..04e9f5dedfc0fad1dc428c8d79ee96918778f29f --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMarker2.html @@ -0,0 +1,195 @@ + + +Cupti: CUpti_ActivityMarker2 Struct Reference + + + + + +
+

CUpti_ActivityMarker2 Struct Reference
+ +[CUPTI Activity API] +

The activity record providing a marker which is an instantaneous point in time. +More... +

+ + + + + + + + + + + + + + + + + + + + + +

Data Fields

const char * domain
CUpti_ActivityFlag flags
uint32_t id
CUpti_ActivityKind kind
const char * name
CUpti_ActivityObjectKindId objectId
CUpti_ActivityObjectKind objectKind
uint32_t pad
uint64_t timestamp
+


Detailed Description

+The marker is specified with a descriptive name and unique id (CUPTI_ACTIVITY_KIND_MARKER).

Field Documentation

+ +
+
+ + + + +
const char* CUpti_ActivityMarker2::domain
+
+
+ +

+The name of the domain to which this marker belongs to. This will be NULL for default domain. +

+

+ +

+ +
+ +

+The flags associated with the marker.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMarker2::id
+
+
+ +

+The marker ID. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MARKER. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityMarker2::name
+
+
+ +

+The marker name for an instantaneous or start marker. This will be NULL for an end marker. +

+

+ +

+ +
+ +

+The identifier for the activity object associated with this marker. 'objectKind' indicates which ID is valid for this record. +

+

+ +

+ +
+ +

+The kind of activity object associated with this marker. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMarker2::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMarker2::timestamp
+
+
+ +

+The timestamp for the marker, in ns. A value of 0 indicates that timestamp information could not be collected for the marker. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMarkerData.html b/doc/Cupti/structCUpti__ActivityMarkerData.html new file mode 100644 index 0000000000000000000000000000000000000000..c0629f77b9f4ac15f5bebb1e4f078f3181ed1e13 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMarkerData.html @@ -0,0 +1,161 @@ + + +Cupti: CUpti_ActivityMarkerData Struct Reference + + + + + +
+

CUpti_ActivityMarkerData Struct Reference
+ +[CUPTI Activity API] +

The activity record providing detailed information for a marker. +More... +

+ + + + + + + + + + + + + + + + + +

Data Fields

uint32_t category
uint32_t color
CUpti_ActivityFlag flags
uint32_t id
CUpti_ActivityKind kind
CUpti_MetricValue payload
CUpti_MetricValueKind payloadKind
+


Detailed Description

+The marker data contains color, payload, and category. (CUPTI_ACTIVITY_KIND_MARKER_DATA).

Field Documentation

+ +
+ +
+ +

+The category for the marker. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMarkerData::color
+
+
+ +

+The color for the marker. +

+

+ +

+ +
+ +

+The flags associated with the marker.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMarkerData::id
+
+
+ +

+The marker ID. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MARKER_DATA. +

+

+ +

+ +
+ +

+The payload value. +

+

+ +

+ +
+ +

+Defines the payload format for the value associated with the marker. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemcpy.html b/doc/Cupti/structCUpti__ActivityMemcpy.html new file mode 100644 index 0000000000000000000000000000000000000000..d651afb165edc968d3a266304de27279c5131f47 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemcpy.html @@ -0,0 +1,283 @@ + + +Cupti: CUpti_ActivityMemcpy Struct Reference + + + + + +
+

CUpti_ActivityMemcpy Struct Reference
+ +[CUPTI Activity API] +

The activity record for memory copies. (deprecated). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bytes
uint32_t contextId
uint8_t copyKind
uint32_t correlationId
uint32_t deviceId
uint8_t dstKind
uint64_t end
uint8_t flags
CUpti_ActivityKind kind
void * reserved0
uint32_t runtimeCorrelationId
uint8_t srcKind
uint64_t start
uint32_t streamId
+


Detailed Description

+This activity record represents a memory copy (CUPTI_ACTIVITY_KIND_MEMCPY).

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemcpy::bytes
+
+
+ +

+The number of bytes transferred by the memory copy. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy::contextId
+
+
+ +

+The ID of the context where the memory copy is occurring. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy::copyKind
+
+
+ +

+The kind of the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemcpyKind
+ +
+

+ +

+ +
+ +

+The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation ID in the driver API activity record that launched the memory copy. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy::deviceId
+
+
+ +

+The ID of the device where the memory copy is occurring. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy::dstKind
+
+
+ +

+The destination memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpy::end
+
+
+ +

+The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy::flags
+
+
+ +

+The flags associated with the memory copy.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The runtime correlation ID of the memory copy. Each memory copy is assigned a unique runtime correlation ID that is identical to the correlation ID in the runtime API activity record that launched the memory copy. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy::srcKind
+
+
+ +

+The source memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpy::start
+
+
+ +

+The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy::streamId
+
+
+ +

+The ID of the stream where the memory copy is occurring. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemcpy3.html b/doc/Cupti/structCUpti__ActivityMemcpy3.html new file mode 100644 index 0000000000000000000000000000000000000000..5cef0bc180f81a6d5a5913f68653d01df30ae553 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemcpy3.html @@ -0,0 +1,300 @@ + + +Cupti: CUpti_ActivityMemcpy3 Struct Reference + + + + + +
+

CUpti_ActivityMemcpy3 Struct Reference
+ +[CUPTI Activity API] +

The activity record for memory copies. (deprecated in CUDA 11.1). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bytes
uint32_t contextId
uint8_t copyKind
uint32_t correlationId
uint32_t deviceId
uint8_t dstKind
uint64_t end
uint8_t flags
uint64_t graphNodeId
CUpti_ActivityKind kind
void * reserved0
uint32_t runtimeCorrelationId
uint8_t srcKind
uint64_t start
uint32_t streamId
+


Detailed Description

+This activity record represents a memory copy (CUPTI_ACTIVITY_KIND_MEMCPY).

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemcpy3::bytes
+
+
+ +

+The number of bytes transferred by the memory copy. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy3::contextId
+
+
+ +

+The ID of the context where the memory copy is occurring. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy3::copyKind
+
+
+ +

+The kind of the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemcpyKind
+ +
+

+ +

+ +
+ +

+The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation ID in the driver API activity record that launched the memory copy. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy3::deviceId
+
+
+ +

+The ID of the device where the memory copy is occurring. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy3::dstKind
+
+
+ +

+The destination memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpy3::end
+
+
+ +

+The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy3::flags
+
+
+ +

+The flags associated with the memory copy.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The unique ID of the graph node that executed this memcpy through graph launch. This field will be 0 if the memcpy is not done through graph launch. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The runtime correlation ID of the memory copy. Each memory copy is assigned a unique runtime correlation ID that is identical to the correlation ID in the runtime API activity record that launched the memory copy. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy3::srcKind
+
+
+ +

+The source memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpy3::start
+
+
+ +

+The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy3::streamId
+
+
+ +

+The ID of the stream where the memory copy is occurring. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemcpy4.html b/doc/Cupti/structCUpti__ActivityMemcpy4.html new file mode 100644 index 0000000000000000000000000000000000000000..e186faefcb0b9f70c2e526655845b0e055ca06d5 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemcpy4.html @@ -0,0 +1,334 @@ + + +Cupti: CUpti_ActivityMemcpy4 Struct Reference + + + + + +
+

CUpti_ActivityMemcpy4 Struct Reference
+ +[CUPTI Activity API] +

The activity record for memory copies. (deprecated in CUDA 11.6). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bytes
uint32_t contextId
uint8_t copyKind
uint32_t correlationId
uint32_t deviceId
uint8_t dstKind
uint64_t end
uint8_t flags
uint32_t graphId
uint64_t graphNodeId
CUpti_ActivityKind kind
uint32_t padding
void * reserved0
uint32_t runtimeCorrelationId
uint8_t srcKind
uint64_t start
uint32_t streamId
+


Detailed Description

+This activity record represents a memory copy (CUPTI_ACTIVITY_KIND_MEMCPY).

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemcpy4::bytes
+
+
+ +

+The number of bytes transferred by the memory copy. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy4::contextId
+
+
+ +

+The ID of the context where the memory copy is occurring. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy4::copyKind
+
+
+ +

+The kind of the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemcpyKind
+ +
+

+ +

+ +
+ +

+The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation ID in the driver API activity record that launched the memory copy. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy4::deviceId
+
+
+ +

+The ID of the device where the memory copy is occurring. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy4::dstKind
+
+
+ +

+The destination memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpy4::end
+
+
+ +

+The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy4::flags
+
+
+ +

+The flags associated with the memory copy.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy4::graphId
+
+
+ +

+The unique ID of the graph that executed this memcpy through graph launch. This field will be 0 if the memcpy is not done through graph launch. +

+

+ +

+ +
+ +

+The unique ID of the graph node that executed this memcpy through graph launch. This field will be 0 if the memcpy is not done through graph launch. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy4::padding
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The runtime correlation ID of the memory copy. Each memory copy is assigned a unique runtime correlation ID that is identical to the correlation ID in the runtime API activity record that launched the memory copy. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy4::srcKind
+
+
+ +

+The source memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpy4::start
+
+
+ +

+The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy4::streamId
+
+
+ +

+The ID of the stream where the memory copy is occurring. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemcpy5.html b/doc/Cupti/structCUpti__ActivityMemcpy5.html new file mode 100644 index 0000000000000000000000000000000000000000..2232951188a1574af0da3484c89b32fec79669f0 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemcpy5.html @@ -0,0 +1,368 @@ + + +Cupti: CUpti_ActivityMemcpy5 Struct Reference + + + + + +
+

CUpti_ActivityMemcpy5 Struct Reference
+ +[CUPTI Activity API] +

The activity record for memory copies. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bytes
uint32_t channelID
CUpti_ChannelType channelType
uint32_t contextId
uint8_t copyKind
uint32_t correlationId
uint32_t deviceId
uint8_t dstKind
uint64_t end
uint8_t flags
uint32_t graphId
uint64_t graphNodeId
CUpti_ActivityKind kind
uint32_t pad2
void * reserved0
uint32_t runtimeCorrelationId
uint8_t srcKind
uint64_t start
uint32_t streamId
+


Detailed Description

+This activity record represents a memory copy (CUPTI_ACTIVITY_KIND_MEMCPY).

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemcpy5::bytes
+
+
+ +

+The number of bytes transferred by the memory copy. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy5::channelID
+
+
+ +

+The ID of the HW channel on which the memory copy is occuring. +

+

+ +

+
+ + + + +
CUpti_ChannelType CUpti_ActivityMemcpy5::channelType
+
+
+ +

+The type of the channel +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy5::contextId
+
+
+ +

+The ID of the context where the memory copy is occurring. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy5::copyKind
+
+
+ +

+The kind of the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemcpyKind
+ +
+

+ +

+ +
+ +

+The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation ID in the driver API activity record that launched the memory copy. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy5::deviceId
+
+
+ +

+The ID of the device where the memory copy is occurring. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy5::dstKind
+
+
+ +

+The destination memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpy5::end
+
+
+ +

+The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy5::flags
+
+
+ +

+The flags associated with the memory copy.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy5::graphId
+
+
+ +

+The unique ID of the graph that executed this memcpy through graph launch. This field will be 0 if the memcpy is not done through graph launch. +

+

+ +

+ +
+ +

+The unique ID of the graph node that executed this memcpy through graph launch. This field will be 0 if the memcpy is not done through graph launch. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy5::pad2
+
+
+ +

+Reserved for internal use. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The runtime correlation ID of the memory copy. Each memory copy is assigned a unique runtime correlation ID that is identical to the correlation ID in the runtime API activity record that launched the memory copy. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpy5::srcKind
+
+
+ +

+The source memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpy5::start
+
+
+ +

+The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpy5::streamId
+
+
+ +

+The ID of the stream where the memory copy is occurring. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemcpyPtoP.html b/doc/Cupti/structCUpti__ActivityMemcpyPtoP.html new file mode 100644 index 0000000000000000000000000000000000000000..ce3810cecd1c9220d4248ef1bd3646f82bc88376 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemcpyPtoP.html @@ -0,0 +1,351 @@ + + +Cupti: CUpti_ActivityMemcpyPtoP Struct Reference + + + + + +
+

CUpti_ActivityMemcpyPtoP Struct Reference
+ +[CUPTI Activity API] +

The activity record for peer-to-peer memory copies. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bytes
uint32_t contextId
uint8_t copyKind
uint32_t correlationId
uint32_t deviceId
uint32_t dstContextId
uint32_t dstDeviceId
uint8_t dstKind
uint64_t end
uint8_t flags
CUpti_ActivityKind kind
uint32_t pad
void * reserved0
uint32_t srcContextId
uint32_t srcDeviceId
uint8_t srcKind
uint64_t start
uint32_t streamId
+


Detailed Description

+This activity record represents a peer-to-peer memory copy (CUPTI_ACTIVITY_KIND_MEMCPY2) but is no longer generated by CUPTI. Peer-to-peer memory copy activities are now reported using the CUpti_ActivityMemcpyPtoP2 activity record..

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemcpyPtoP::bytes
+
+
+ +

+The number of bytes transferred by the memory copy. +

+

+ +

+ +
+ +

+The ID of the context where the memory copy is occurring. +

+

+ +

+ +
+ +

+The kind of the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemcpyKind
+ +
+

+ +

+ +
+ +

+The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation ID in the driver and runtime API activity record that launched the memory copy. +

+

+ +

+ +
+ +

+The ID of the device where the memory copy is occurring. +

+

+ +

+ +
+ +

+The ID of the context owning the memory being copied to. +

+

+ +

+ +
+ +

+The ID of the device where memory is being copied to. +

+

+ +

+ +
+ +

+The destination memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpyPtoP::end
+
+
+ +

+The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMemcpyPtoP::flags
+
+
+ +

+The flags associated with the memory copy.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY2. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpyPtoP::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The ID of the context owning the memory being copied from. +

+

+ +

+ +
+ +

+The ID of the device where memory is being copied from. +

+

+ +

+ +
+ +

+The source memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpyPtoP::start
+
+
+ +

+The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+ +
+ +

+The ID of the stream where the memory copy is occurring. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemcpyPtoP2.html b/doc/Cupti/structCUpti__ActivityMemcpyPtoP2.html new file mode 100644 index 0000000000000000000000000000000000000000..af505f8f7806c29de83a7b5c004b7c53d8a35ed9 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemcpyPtoP2.html @@ -0,0 +1,368 @@ + + +Cupti: CUpti_ActivityMemcpyPtoP2 Struct Reference + + + + + +
+

CUpti_ActivityMemcpyPtoP2 Struct Reference
+ +[CUPTI Activity API] +

The activity record for peer-to-peer memory copies. (deprecated in CUDA 11.1). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bytes
uint32_t contextId
uint8_t copyKind
uint32_t correlationId
uint32_t deviceId
uint32_t dstContextId
uint32_t dstDeviceId
uint8_t dstKind
uint64_t end
uint8_t flags
uint64_t graphNodeId
CUpti_ActivityKind kind
uint32_t pad
void * reserved0
uint32_t srcContextId
uint32_t srcDeviceId
uint8_t srcKind
uint64_t start
uint32_t streamId
+


Detailed Description

+This activity record represents a peer-to-peer memory copy (CUPTI_ACTIVITY_KIND_MEMCPY2).

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemcpyPtoP2::bytes
+
+
+ +

+The number of bytes transferred by the memory copy. +

+

+ +

+ +
+ +

+The ID of the context where the memory copy is occurring. +

+

+ +

+ +
+ +

+The kind of the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemcpyKind
+ +
+

+ +

+ +
+ +

+The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation ID in the driver and runtime API activity record that launched the memory copy. +

+

+ +

+ +
+ +

+The ID of the device where the memory copy is occurring. +

+

+ +

+ +
+ +

+The ID of the context owning the memory being copied to. +

+

+ +

+ +
+ +

+The ID of the device where memory is being copied to. +

+

+ +

+ +
+ +

+The destination memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpyPtoP2::end
+
+
+ +

+The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+ +
+ +

+The flags associated with the memory copy.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The unique ID of the graph node that executed the memcpy through graph launch. This field will be 0 if memcpy is not done using graph launch. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY2. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpyPtoP2::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The ID of the context owning the memory being copied from. +

+

+ +

+ +
+ +

+The ID of the device where memory is being copied from. +

+

+ +

+ +
+ +

+The source memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpyPtoP2::start
+
+
+ +

+The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+ +
+ +

+The ID of the stream where the memory copy is occurring. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemcpyPtoP3.html b/doc/Cupti/structCUpti__ActivityMemcpyPtoP3.html new file mode 100644 index 0000000000000000000000000000000000000000..41a9f0930dca9a7f337b7468d9999aa3fe92a425 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemcpyPtoP3.html @@ -0,0 +1,402 @@ + + +Cupti: CUpti_ActivityMemcpyPtoP3 Struct Reference + + + + + +
+

CUpti_ActivityMemcpyPtoP3 Struct Reference
+ +[CUPTI Activity API] +

The activity record for peer-to-peer memory copies. (deprecated in CUDA 11.6). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bytes
uint32_t contextId
uint8_t copyKind
uint32_t correlationId
uint32_t deviceId
uint32_t dstContextId
uint32_t dstDeviceId
uint8_t dstKind
uint64_t end
uint8_t flags
uint32_t graphId
uint64_t graphNodeId
CUpti_ActivityKind kind
uint32_t pad
uint32_t padding
void * reserved0
uint32_t srcContextId
uint32_t srcDeviceId
uint8_t srcKind
uint64_t start
uint32_t streamId
+


Detailed Description

+This activity record represents a peer-to-peer memory copy (CUPTI_ACTIVITY_KIND_MEMCPY2).

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemcpyPtoP3::bytes
+
+
+ +

+The number of bytes transferred by the memory copy. +

+

+ +

+ +
+ +

+The ID of the context where the memory copy is occurring. +

+

+ +

+ +
+ +

+The kind of the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemcpyKind
+ +
+

+ +

+ +
+ +

+The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation ID in the driver and runtime API activity record that launched the memory copy. +

+

+ +

+ +
+ +

+The ID of the device where the memory copy is occurring. +

+

+ +

+ +
+ +

+The ID of the context owning the memory being copied to. +

+

+ +

+ +
+ +

+The ID of the device where memory is being copied to. +

+

+ +

+ +
+ +

+The destination memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpyPtoP3::end
+
+
+ +

+The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+ +
+ +

+The flags associated with the memory copy.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The unique ID of the graph that executed this memcpy through graph launch. This field will be 0 if the memcpy is not done through graph launch. +

+

+ +

+ +
+ +

+The unique ID of the graph node that executed the memcpy through graph launch. This field will be 0 if memcpy is not done using graph launch. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY2. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpyPtoP3::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The ID of the context owning the memory being copied from. +

+

+ +

+ +
+ +

+The ID of the device where memory is being copied from. +

+

+ +

+ +
+ +

+The source memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpyPtoP3::start
+
+
+ +

+The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+ +
+ +

+The ID of the stream where the memory copy is occurring. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemcpyPtoP4.html b/doc/Cupti/structCUpti__ActivityMemcpyPtoP4.html new file mode 100644 index 0000000000000000000000000000000000000000..9eb87e920bfbd823c19aeeb3d4fcc6c0f015321c --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemcpyPtoP4.html @@ -0,0 +1,419 @@ + + +Cupti: CUpti_ActivityMemcpyPtoP4 Struct Reference + + + + + +
+

CUpti_ActivityMemcpyPtoP4 Struct Reference
+ +[CUPTI Activity API] +

The activity record for peer-to-peer memory copies. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bytes
uint32_t channelID
CUpti_ChannelType channelType
uint32_t contextId
uint8_t copyKind
uint32_t correlationId
uint32_t deviceId
uint32_t dstContextId
uint32_t dstDeviceId
uint8_t dstKind
uint64_t end
uint8_t flags
uint32_t graphId
uint64_t graphNodeId
CUpti_ActivityKind kind
uint32_t pad
void * reserved0
uint32_t srcContextId
uint32_t srcDeviceId
uint8_t srcKind
uint64_t start
uint32_t streamId
+


Detailed Description

+This activity record represents a peer-to-peer memory copy (CUPTI_ACTIVITY_KIND_MEMCPY2).

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemcpyPtoP4::bytes
+
+
+ +

+The number of bytes transferred by the memory copy. +

+

+ +

+ +
+ +

+The ID of the HW channel on which the memory copy is occuring. +

+

+ +

+
+ + + + +
CUpti_ChannelType CUpti_ActivityMemcpyPtoP4::channelType
+
+
+ +

+The type of the channel +

+

+ +

+ +
+ +

+The ID of the context where the memory copy is occurring. +

+

+ +

+ +
+ +

+The kind of the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemcpyKind
+ +
+

+ +

+ +
+ +

+The correlation ID of the memory copy. Each memory copy is assigned a unique correlation ID that is identical to the correlation ID in the driver and runtime API activity record that launched the memory copy. +

+

+ +

+ +
+ +

+The ID of the device where the memory copy is occurring. +

+

+ +

+ +
+ +

+The ID of the context owning the memory being copied to. +

+

+ +

+ +
+ +

+The ID of the device where memory is being copied to. +

+

+ +

+ +
+ +

+The destination memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpyPtoP4::end
+
+
+ +

+The end timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+ +
+ +

+The flags associated with the memory copy.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The unique ID of the graph that executed this memcpy through graph launch. This field will be 0 if the memcpy is not done through graph launch. +

+

+ +

+ +
+ +

+The unique ID of the graph node that executed the memcpy through graph launch. This field will be 0 if memcpy is not done using graph launch. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMCPY2. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemcpyPtoP4::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The ID of the context owning the memory being copied from. +

+

+ +

+ +
+ +

+The ID of the device where memory is being copied from. +

+

+ +

+ +
+ +

+The source memory kind read by the memory copy, stored as a byte to reduce record size.

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemcpyPtoP4::start
+
+
+ +

+The start timestamp for the memory copy, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory copy. +

+

+ +

+ +
+ +

+The ID of the stream where the memory copy is occurring. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemory.html b/doc/Cupti/structCUpti__ActivityMemory.html new file mode 100644 index 0000000000000000000000000000000000000000..070ffe91fd3e14ac7d1d8b6392137be2689f7757 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemory.html @@ -0,0 +1,246 @@ + + +Cupti: CUpti_ActivityMemory Struct Reference + + + + + +
+

CUpti_ActivityMemory Struct Reference
+ +[CUPTI Activity API] +

The activity record for memory. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t address
uint64_t allocPC
uint64_t bytes
uint32_t contextId
uint32_t deviceId
uint64_t end
uint64_t freePC
CUpti_ActivityKind kind
CUpti_ActivityMemoryKind memoryKind
const char * name
uint32_t processId
uint64_t start
+


Detailed Description

+This activity record represents a memory allocation and free operation (CUPTI_ACTIVITY_KIND_MEMORY). This activity record provides a single record for the memory allocation and memory release operations.

+Note: It is recommended to move to the new activity record CUpti_ActivityMemory3 enabled using the kind CUPTI_ACTIVITY_KIND_MEMORY2. CUpti_ActivityMemory3 provides separate records for memory allocation and memory release operations. This allows to correlate the corresponding driver and runtime API activity record with the memory operation.


Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemory::address
+
+
+ +

+The virtual address of the allocation +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory::allocPC
+
+
+ +

+The program counter of the allocation of memory +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory::bytes
+
+
+ +

+The number of bytes of memory allocated. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemory::contextId
+
+
+ +

+The ID of the context. If context is NULL, contextId is set to CUPTI_INVALID_CONTEXT_ID. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemory::deviceId
+
+
+ +

+The ID of the device where the memory allocation is taking place. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory::end
+
+
+ +

+The end timestamp for the memory operation, i.e. the time when memory was freed, in ns. This will be 0 if memory is not freed in the application +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory::freePC
+
+
+ +

+The program counter of the freeing of memory. This will be 0 if memory is not freed in the application +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMORY +

+

+ +

+ +
+ +

+The memory kind requested by the user +

+

+ +

+
+ + + + +
const char* CUpti_ActivityMemory::name
+
+
+ +

+Variable name. This name is shared across all activity records representing the same symbol, and so should not be modified. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemory::processId
+
+
+ +

+The ID of the process to which this record belongs to. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory::start
+
+
+ +

+The start timestamp for the memory operation, i.e. the time when memory was allocated, in ns. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemory2.html b/doc/Cupti/structCUpti__ActivityMemory2.html new file mode 100644 index 0000000000000000000000000000000000000000..a41cb485f022fd550407380d07db0ac2478b0e6c --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemory2.html @@ -0,0 +1,370 @@ + + +Cupti: CUpti_ActivityMemory2 Struct Reference + + + + + +
+

CUpti_ActivityMemory2 Struct Reference
+ +[CUPTI Activity API] +

The activity record for memory. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t address
uint64_t bytes
uint32_t contextId
uint32_t correlationId
uint32_t deviceId
uint32_t isAsync
CUpti_ActivityKind kind
CUpti_ActivityMemoryKind memoryKind
CUpti_ActivityMemoryOperationType memoryOperationType
struct {
   uint64_t   address
   CUpti_ActivityMemoryPoolType   memoryPoolType
   uint64_t   releaseThreshold
memoryPoolConfig
const char * name
uint64_t PC
uint32_t processId
uint32_t streamId
uint64_t timestamp
uint64_t processId
uint64_t size
+


Detailed Description

+This activity record represents a memory allocation and free operation (CUPTI_ACTIVITY_KIND_MEMORY2). This activity record provides separate records for memory allocation and memory release operations. This allows to correlate the corresponding driver and runtime API activity record with the memory operation.

+Note: This activity record is an upgrade over CUpti_ActivityMemory enabled using the kind CUPTI_ACTIVITY_KIND_MEMORY. CUpti_ActivityMemory provides a single record for the memory allocation and memory release operations.


Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemory2::address
+
+
+ +

+The virtual address of the allocation.

+The base address of the memory pool. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory2::bytes
+
+
+ +

+The number of bytes of memory allocated. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemory2::contextId
+
+
+ +

+The ID of the context. If context is NULL, contextId is set to CUPTI_INVALID_CONTEXT_ID. +

+

+ +

+ +
+ +

+The correlation ID of the memory operation. Each memory operation is assigned a unique correlation ID that is identical to the correlation ID in the driver and runtime API activity record that launched the memory operation. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemory2::deviceId
+
+
+ +

+The ID of the device where the memory operation is taking place. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemory2::isAsync
+
+
+ +

+isAsync is set if memory operation happens through async memory APIs. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMORY2 +

+

+ +

+ +
+ +

+The memory kind requested by the user, CUpti_ActivityMemoryKind. +

+

+ +

+ +
+ +

+The memory operation requested by the user, CUpti_ActivityMemoryOperationType. +

+

+ +

+
+ + + + +
struct { ... } CUpti_ActivityMemory2::memoryPoolConfig
+
+
+ +

+The memory pool configuration used for the memory operations. +

+

+ +

+ +

+
+ + + + +
const char* CUpti_ActivityMemory2::name
+
+
+ +

+Variable name. This name is shared across all activity records representing the same symbol, and so should not be modified. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory2::PC
+
+
+ +

+The program counter of the memory operation. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory2::processId
+
+
+ +

+The processId of the memory pool. processId is valid if memoryPoolType is CUPTI_ACTIVITY_MEMORY_POOL_TYPE_IMPORTED, CUpti_ActivityMemoryPoolType. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemory2::processId
+
+
+ +

+The ID of the process to which this record belongs to. +

+

+ +

+ +
+ +

+The release threshold of the memory pool in bytes. releaseThreshold is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory2::size
+
+
+ +

+The size of the memory pool in bytes. size is valid if memoryPoolType is CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemory2::streamId
+
+
+ +

+The ID of the stream. If memory operation is not async, streamId is set to CUPTI_INVALID_STREAM_ID. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory2::timestamp
+
+
+ +

+The start timestamp for the memory operation, in ns. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemory3.html b/doc/Cupti/structCUpti__ActivityMemory3.html new file mode 100644 index 0000000000000000000000000000000000000000..768547e4c13a6d071c01e52190353658256871d7 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemory3.html @@ -0,0 +1,301 @@ + + +Cupti: CUpti_ActivityMemory3 Struct Reference + + + + + +
+

CUpti_ActivityMemory3 Struct Reference
+ +[CUPTI Activity API] +

The activity record for memory. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Structures

struct  PACKED_ALIGNMENT

Data Fields

uint64_t address
uint64_t bytes
uint32_t contextId
uint32_t correlationId
uint32_t deviceId
uint32_t isAsync
CUpti_ActivityKind kind
CUpti_ActivityMemoryKind memoryKind
CUpti_ActivityMemoryOperationType memoryOperationType
struct
+CUpti_ActivityMemory3::PACKED_ALIGNMENT 
memoryPoolConfig
const char * name
uint64_t PC
uint32_t processId
uint32_t streamId
uint64_t timestamp
+


Detailed Description

+This activity record represents a memory allocation and free operation (CUPTI_ACTIVITY_KIND_MEMORY2). This activity record provides separate records for memory allocation and memory release operations. This allows to correlate the corresponding driver and runtime API activity record with the memory operation.

+Note: This activity record is an upgrade over CUpti_ActivityMemory enabled using the kind CUPTI_ACTIVITY_KIND_MEMORY. CUpti_ActivityMemory provides a single record for the memory allocation and memory release operations.


Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemory3::address
+
+
+ +

+The virtual address of the allocation. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory3::bytes
+
+
+ +

+The number of bytes of memory allocated. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemory3::contextId
+
+
+ +

+The ID of the context. If context is NULL, contextId is set to CUPTI_INVALID_CONTEXT_ID. +

+

+ +

+ +
+ +

+The correlation ID of the memory operation. Each memory operation is assigned a unique correlation ID that is identical to the correlation ID in the driver and runtime API activity record that launched the memory operation. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemory3::deviceId
+
+
+ +

+The ID of the device where the memory operation is taking place. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemory3::isAsync
+
+
+ +

+isAsync is set if memory operation happens through async memory APIs. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMORY2 +

+

+ +

+ +
+ +

+The memory kind requested by the user, CUpti_ActivityMemoryKind. +

+

+ +

+ +
+ +

+The memory operation requested by the user, CUpti_ActivityMemoryOperationType. +

+

+ +

+ +
+ +

+The memory pool configuration used for the memory operations. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityMemory3::name
+
+
+ +

+Variable name. This name is shared across all activity records representing the same symbol, and so should not be modified. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory3::PC
+
+
+ +

+The program counter of the memory operation. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemory3::processId
+
+
+ +

+The ID of the process to which this record belongs to. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemory3::streamId
+
+
+ +

+The ID of the stream. If memory operation is not async, streamId is set to CUPTI_INVALID_STREAM_ID. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory3::timestamp
+
+
+ +

+The start timestamp for the memory operation, in ns. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemory3_1_1PACKED__ALIGNMENT.html b/doc/Cupti/structCUpti__ActivityMemory3_1_1PACKED__ALIGNMENT.html new file mode 100644 index 0000000000000000000000000000000000000000..85c5470a374a5108bc8b99775691f7f237812275 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemory3_1_1PACKED__ALIGNMENT.html @@ -0,0 +1,139 @@ + + +Cupti: CUpti_ActivityMemory3::CUpti_ActivityMemory3::PACKED_ALIGNMENT Struct Reference + + + + + +
+

CUpti_ActivityMemory3::CUpti_ActivityMemory3::PACKED_ALIGNMENT Struct Reference

+ + + + + + + + + + + + + + +

Data Fields

uint64_t address
CUpti_ActivityMemoryPoolType memoryPoolType
uint64_t releaseThreshold
uint64_t utilizedSize
uint64_t processId
uint64_t size
+

Detailed Description

+The memory pool configuration used for the memory operations.

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemory3::CUpti_ActivityMemory3::PACKED_ALIGNMENT::address
+
+
+ +

+The base address of the memory pool. +

+

+ +

+
+ + + + +
CUpti_ActivityMemoryPoolType CUpti_ActivityMemory3::CUpti_ActivityMemory3::PACKED_ALIGNMENT::memoryPoolType
+
+
+ +

+The type of the memory pool, CUpti_ActivityMemoryPoolType +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory3::CUpti_ActivityMemory3::PACKED_ALIGNMENT::processId
+
+
+ +

+The processId of the memory pool. processId is valid if memoryPoolType is CUPTI_ACTIVITY_MEMORY_POOL_TYPE_IMPORTED, CUpti_ActivityMemoryPoolType. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory3::CUpti_ActivityMemory3::PACKED_ALIGNMENT::releaseThreshold
+
+
+ +

+The release threshold of the memory pool in bytes. releaseThreshold is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory3::CUpti_ActivityMemory3::PACKED_ALIGNMENT::size
+
+
+ +

+The size of the memory pool in bytes. size is valid if memoryPoolType is CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemory3::CUpti_ActivityMemory3::PACKED_ALIGNMENT::utilizedSize
+
+
+ +

+The utilized size of the memory pool. utilizedSize is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemoryPool.html b/doc/Cupti/structCUpti__ActivityMemoryPool.html new file mode 100644 index 0000000000000000000000000000000000000000..5c9e3acb40ac1a82c9e5f5f9f43aa43c210aca93 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemoryPool.html @@ -0,0 +1,245 @@ + + +Cupti: CUpti_ActivityMemoryPool Struct Reference + + + + + +
+

CUpti_ActivityMemoryPool Struct Reference
+ +[CUPTI Activity API] +

The activity record for memory pool. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t address
uint32_t correlationId
uint32_t deviceId
CUpti_ActivityKind kind
CUpti_ActivityMemoryPoolOperationType memoryPoolOperationType
CUpti_ActivityMemoryPoolType memoryPoolType
size_t minBytesToKeep
uint32_t pad
uint32_t processId
uint64_t releaseThreshold
uint64_t size
uint64_t timestamp
+


Detailed Description

+This activity record represents a memory pool creation, destruction and trimming (CUPTI_ACTIVITY_KIND_MEMORY_POOL). This activity record provides separate records for memory pool creation, destruction and triming operations. This allows to correlate the corresponding driver and runtime API activity record with the memory pool operation.

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemoryPool::address
+
+
+ +

+The virtual address of the allocation. +

+

+ +

+ +
+ +

+The correlation ID of the memory pool operation. Each memory pool operation is assigned a unique correlation ID that is identical to the correlation ID in the driver and runtime API activity record that launched the memory operation. +

+

+ +

+ +
+ +

+The ID of the device where the memory pool is created. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMORY_POOL +

+

+ +

+ +

+ +

+ +
+ +

+The minimum bytes to keep of the memory pool. minBytesToKeep is valid for CUPTI_ACTIVITY_MEMORY_POOL_OPERATION_TYPE_TRIMMED, CUpti_ActivityMemoryPoolOperationType +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemoryPool::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The ID of the process to which this record belongs to. +

+

+ +

+ +
+ +

+The release threshold of the memory pool. releaseThreshold is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemoryPool::size
+
+
+ +

+The size of the memory pool operation in bytes. size is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+

+ +

+ +
+ +

+The start timestamp for the memory operation, in ns. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemoryPool2.html b/doc/Cupti/structCUpti__ActivityMemoryPool2.html new file mode 100644 index 0000000000000000000000000000000000000000..a3cef92d7c6228f8980e43c6077b76ec8d930136 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemoryPool2.html @@ -0,0 +1,262 @@ + + +Cupti: CUpti_ActivityMemoryPool2 Struct Reference + + + + + +
+

CUpti_ActivityMemoryPool2 Struct Reference
+ +[CUPTI Activity API] +

The activity record for memory pool. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t address
uint32_t correlationId
uint32_t deviceId
CUpti_ActivityKind kind
CUpti_ActivityMemoryPoolOperationType memoryPoolOperationType
CUpti_ActivityMemoryPoolType memoryPoolType
size_t minBytesToKeep
uint32_t pad
uint32_t processId
uint64_t releaseThreshold
uint64_t size
uint64_t timestamp
uint64_t utilizedSize
+


Detailed Description

+This activity record represents a memory pool creation, destruction and trimming (CUPTI_ACTIVITY_KIND_MEMORY_POOL). This activity record provides separate records for memory pool creation, destruction and triming operations. This allows to correlate the corresponding driver and runtime API activity record with the memory pool operation.

Field Documentation

+ +
+ +
+ +

+The virtual address of the allocation. +

+

+ +

+ +
+ +

+The correlation ID of the memory pool operation. Each memory pool operation is assigned a unique correlation ID that is identical to the correlation ID in the driver and runtime API activity record that launched the memory operation. +

+

+ +

+ +
+ +

+The ID of the device where the memory pool is created. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMORY_POOL +

+

+ +

+ +

+ +

+ +
+ +

+The minimum bytes to keep of the memory pool. minBytesToKeep is valid for CUPTI_ACTIVITY_MEMORY_POOL_OPERATION_TYPE_TRIMMED, CUpti_ActivityMemoryPoolOperationType +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemoryPool2::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The ID of the process to which this record belongs to. +

+

+ +

+ +
+ +

+The release threshold of the memory pool. releaseThreshold is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemoryPool2::size
+
+
+ +

+The size of the memory pool operation in bytes. size is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+

+ +

+ +
+ +

+The start timestamp for the memory operation, in ns. +

+

+ +

+ +
+ +

+The utilized size of the memory pool. utilizedSize is valid for CUPTI_ACTIVITY_MEMORY_POOL_TYPE_LOCAL, CUpti_ActivityMemoryPoolType. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemset.html b/doc/Cupti/structCUpti__ActivityMemset.html new file mode 100644 index 0000000000000000000000000000000000000000..57eae2f7b2e3f522b1b23a5995e0f3dfedb83149 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemset.html @@ -0,0 +1,247 @@ + + +Cupti: CUpti_ActivityMemset Struct Reference + + + + + +
+

CUpti_ActivityMemset Struct Reference
+ +[CUPTI Activity API] +

The activity record for memset. (deprecated). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bytes
uint32_t contextId
uint32_t correlationId
uint32_t deviceId
uint64_t end
uint16_t flags
CUpti_ActivityKind kind
uint16_t memoryKind
void * reserved0
uint64_t start
uint32_t streamId
uint32_t value
+


Detailed Description

+This activity record represents a memory set operation (CUPTI_ACTIVITY_KIND_MEMSET).

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemset::bytes
+
+
+ +

+The number of bytes being set by the memory set. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset::contextId
+
+
+ +

+The ID of the context where the memory set is occurring. +

+

+ +

+ +
+ +

+The correlation ID of the memory set. Each memory set is assigned a unique correlation ID that is identical to the correlation ID in the driver API activity record that launched the memory set. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset::deviceId
+
+
+ +

+The ID of the device where the memory set is occurring. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemset::end
+
+
+ +

+The end timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory set. +

+

+ +

+
+ + + + +
uint16_t CUpti_ActivityMemset::flags
+
+
+ +

+The flags associated with the memset.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMSET. +

+

+ +

+
+ + + + +
uint16_t CUpti_ActivityMemset::memoryKind
+
+
+ +

+The memory kind of the memory set

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemset::start
+
+
+ +

+The start timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory set. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset::streamId
+
+
+ +

+The ID of the stream where the memory set is occurring. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset::value
+
+
+ +

+The value being assigned to memory by the memory set. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemset2.html b/doc/Cupti/structCUpti__ActivityMemset2.html new file mode 100644 index 0000000000000000000000000000000000000000..5f848e59f2ee610521d245433ef891790667781a --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemset2.html @@ -0,0 +1,264 @@ + + +Cupti: CUpti_ActivityMemset2 Struct Reference + + + + + +
+

CUpti_ActivityMemset2 Struct Reference
+ +[CUPTI Activity API] +

The activity record for memset. (deprecated in CUDA 11.1). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bytes
uint32_t contextId
uint32_t correlationId
uint32_t deviceId
uint64_t end
uint16_t flags
uint64_t graphNodeId
CUpti_ActivityKind kind
uint16_t memoryKind
void * reserved0
uint64_t start
uint32_t streamId
uint32_t value
+


Detailed Description

+This activity record represents a memory set operation (CUPTI_ACTIVITY_KIND_MEMSET).

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemset2::bytes
+
+
+ +

+The number of bytes being set by the memory set. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset2::contextId
+
+
+ +

+The ID of the context where the memory set is occurring. +

+

+ +

+ +
+ +

+The correlation ID of the memory set. Each memory set is assigned a unique correlation ID that is identical to the correlation ID in the driver API activity record that launched the memory set. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset2::deviceId
+
+
+ +

+The ID of the device where the memory set is occurring. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemset2::end
+
+
+ +

+The end timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory set. +

+

+ +

+
+ + + + +
uint16_t CUpti_ActivityMemset2::flags
+
+
+ +

+The flags associated with the memset.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The unique ID of the graph node that executed this memset through graph launch. This field will be 0 if the memset is not executed through graph launch. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMSET. +

+

+ +

+
+ + + + +
uint16_t CUpti_ActivityMemset2::memoryKind
+
+
+ +

+The memory kind of the memory set

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemset2::start
+
+
+ +

+The start timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory set. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset2::streamId
+
+
+ +

+The ID of the stream where the memory set is occurring. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset2::value
+
+
+ +

+The value being assigned to memory by the memory set. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemset3.html b/doc/Cupti/structCUpti__ActivityMemset3.html new file mode 100644 index 0000000000000000000000000000000000000000..f31e5bc029df02d38a9df4d9dcff10eac5f18479 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemset3.html @@ -0,0 +1,298 @@ + + +Cupti: CUpti_ActivityMemset3 Struct Reference + + + + + +
+

CUpti_ActivityMemset3 Struct Reference
+ +[CUPTI Activity API] +

The activity record for memset. (deprecated in CUDA 11.6). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bytes
uint32_t contextId
uint32_t correlationId
uint32_t deviceId
uint64_t end
uint16_t flags
uint32_t graphId
uint64_t graphNodeId
CUpti_ActivityKind kind
uint16_t memoryKind
uint32_t padding
void * reserved0
uint64_t start
uint32_t streamId
uint32_t value
+


Detailed Description

+This activity record represents a memory set operation (CUPTI_ACTIVITY_KIND_MEMSET).

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemset3::bytes
+
+
+ +

+The number of bytes being set by the memory set. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset3::contextId
+
+
+ +

+The ID of the context where the memory set is occurring. +

+

+ +

+ +
+ +

+The correlation ID of the memory set. Each memory set is assigned a unique correlation ID that is identical to the correlation ID in the driver API activity record that launched the memory set. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset3::deviceId
+
+
+ +

+The ID of the device where the memory set is occurring. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemset3::end
+
+
+ +

+The end timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory set. +

+

+ +

+
+ + + + +
uint16_t CUpti_ActivityMemset3::flags
+
+
+ +

+The flags associated with the memset.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset3::graphId
+
+
+ +

+The unique ID of the graph that executed this memset through graph launch. This field will be 0 if the memset is not executed through graph launch. +

+

+ +

+ +
+ +

+The unique ID of the graph node that executed this memset through graph launch. This field will be 0 if the memset is not executed through graph launch. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMSET. +

+

+ +

+
+ + + + +
uint16_t CUpti_ActivityMemset3::memoryKind
+
+
+ +

+The memory kind of the memory set

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset3::padding
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemset3::start
+
+
+ +

+The start timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory set. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset3::streamId
+
+
+ +

+The ID of the stream where the memory set is occurring. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset3::value
+
+
+ +

+The value being assigned to memory by the memory set. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMemset4.html b/doc/Cupti/structCUpti__ActivityMemset4.html new file mode 100644 index 0000000000000000000000000000000000000000..f09b062b1b2f859de46f8f9071dc8b3bf72cedf1 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMemset4.html @@ -0,0 +1,332 @@ + + +Cupti: CUpti_ActivityMemset4 Struct Reference + + + + + +
+

CUpti_ActivityMemset4 Struct Reference
+ +[CUPTI Activity API] +

The activity record for memset. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bytes
uint32_t channelID
CUpti_ChannelType channelType
uint32_t contextId
uint32_t correlationId
uint32_t deviceId
uint64_t end
uint16_t flags
uint32_t graphId
uint64_t graphNodeId
CUpti_ActivityKind kind
uint16_t memoryKind
uint32_t pad2
void * reserved0
uint64_t start
uint32_t streamId
uint32_t value
+


Detailed Description

+This activity record represents a memory set operation (CUPTI_ACTIVITY_KIND_MEMSET).

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityMemset4::bytes
+
+
+ +

+The number of bytes being set by the memory set. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset4::channelID
+
+
+ +

+The ID of the HW channel on which the memory set is occuring. +

+

+ +

+
+ + + + +
CUpti_ChannelType CUpti_ActivityMemset4::channelType
+
+
+ +

+The type of the channel +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset4::contextId
+
+
+ +

+The ID of the context where the memory set is occurring. +

+

+ +

+ +
+ +

+The correlation ID of the memory set. Each memory set is assigned a unique correlation ID that is identical to the correlation ID in the driver API activity record that launched the memory set. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset4::deviceId
+
+
+ +

+The ID of the device where the memory set is occurring. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemset4::end
+
+
+ +

+The end timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory set. +

+

+ +

+
+ + + + +
uint16_t CUpti_ActivityMemset4::flags
+
+
+ +

+The flags associated with the memset.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset4::graphId
+
+
+ +

+The unique ID of the graph that executed this memset through graph launch. This field will be 0 if the memset is not executed through graph launch. +

+

+ +

+ +
+ +

+The unique ID of the graph node that executed this memset through graph launch. This field will be 0 if the memset is not executed through graph launch. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MEMSET. +

+

+ +

+
+ + + + +
uint16_t CUpti_ActivityMemset4::memoryKind
+
+
+ +

+The memory kind of the memory set

See also:
CUpti_ActivityMemoryKind
+ +
+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset4::pad2
+
+
+ +

+Undefined. Reserved for internal use +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityMemset4::start
+
+
+ +

+The start timestamp for the memory set, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the memory set. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset4::streamId
+
+
+ +

+The ID of the stream where the memory set is occurring. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityMemset4::value
+
+
+ +

+The value being assigned to memory by the memory set. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMetric.html b/doc/Cupti/structCUpti__ActivityMetric.html new file mode 100644 index 0000000000000000000000000000000000000000..187ecb4bd8d3da4be5bb206603affc67aae89777 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMetric.html @@ -0,0 +1,144 @@ + + +Cupti: CUpti_ActivityMetric Struct Reference + + + + + +
+

CUpti_ActivityMetric Struct Reference
+ +[CUPTI Activity API] +

The activity record for a CUPTI metric. +More... +

+ + + + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
uint8_t flags
CUpti_MetricID id
CUpti_ActivityKind kind
uint8_t pad [3]
CUpti_MetricValue value
+


Detailed Description

+This activity record represents the collection of a CUPTI metric value (CUPTI_ACTIVITY_KIND_METRIC). This activity record kind is not produced by the activity API but is included for completeness and ease-of-use. Profile frameworks built on top of CUPTI that collect metric data may choose to use this type to store the collected metric data.

Field Documentation

+ +
+ +
+ +

+The correlation ID of the metric. Use of this ID is user-defined, but typically this ID value will equal the correlation ID of the kernel for which the metric was gathered. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMetric::flags
+
+
+ +

+The properties of this metric.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The metric ID. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_METRIC. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMetric::pad[3]
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The metric value. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityMetricInstance.html b/doc/Cupti/structCUpti__ActivityMetricInstance.html new file mode 100644 index 0000000000000000000000000000000000000000..1370e49d9913ce338fc988c28bfe9d8b89a61627 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityMetricInstance.html @@ -0,0 +1,161 @@ + + +Cupti: CUpti_ActivityMetricInstance Struct Reference + + + + + +
+

CUpti_ActivityMetricInstance Struct Reference
+ +[CUPTI Activity API] +

The activity record for a CUPTI metric with instance information. +More... +

+ + + + + + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
uint8_t flags
CUpti_MetricID id
uint32_t instance
CUpti_ActivityKind kind
uint8_t pad [7]
CUpti_MetricValue value
+


Detailed Description

+This activity record represents a CUPTI metric value for a specific metric domain instance (CUPTI_ACTIVITY_KIND_METRIC_INSTANCE). This activity record kind is not produced by the activity API but is included for completeness and ease-of-use. Profile frameworks built on top of CUPTI that collect metric data may choose to use this type to store the collected metric data. This activity record should be used when metric domain instance information needs to be associated with the metric.

Field Documentation

+ +
+ +
+ +

+The correlation ID of the metric. Use of this ID is user-defined, but typically this ID value will equal the correlation ID of the kernel for which the metric was gathered. +

+

+ +

+ +
+ +

+The properties of this metric.

See also:
CUpti_ActivityFlag
+ +
+

+ +

+ +
+ +

+The metric ID. +

+

+ +

+ +
+ +

+The metric domain instance. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_METRIC_INSTANCE. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityMetricInstance::pad[7]
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The metric value. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityModule.html b/doc/Cupti/structCUpti__ActivityModule.html new file mode 100644 index 0000000000000000000000000000000000000000..1aa33594e5948e692a5de66a72c600e7003ed4c9 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityModule.html @@ -0,0 +1,143 @@ + + +Cupti: CUpti_ActivityModule Struct Reference + + + + + +
+

CUpti_ActivityModule Struct Reference
+ +[CUPTI Activity API] +

The activity record for a CUDA module. +More... +

+ + + + + + + + + + + + + + + +

Data Fields

uint32_t contextId
const void * cubin
uint32_t cubinSize
uint32_t id
CUpti_ActivityKind kind
uint32_t pad
+


Detailed Description

+This activity record represents a CUDA module (CUPTI_ACTIVITY_KIND_MODULE). This activity record kind is not produced by the activity API but is included for completeness and ease-of-use. Profile frameworks built on top of CUPTI that collect module data from the module callback may choose to use this type to store the collected module data.

Field Documentation

+ +
+
+ + + + +
uint32_t CUpti_ActivityModule::contextId
+
+
+ +

+The ID of the context where the module is loaded. +

+

+ +

+
+ + + + +
const void* CUpti_ActivityModule::cubin
+
+
+ +

+The pointer to cubin. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityModule::cubinSize
+
+
+ +

+The cubin size. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityModule::id
+
+
+ +

+The module ID. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_MODULE. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityModule::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityName.html b/doc/Cupti/structCUpti__ActivityName.html new file mode 100644 index 0000000000000000000000000000000000000000..e19025774016dd69df00f7ba2f63d3fcfcb49d47 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityName.html @@ -0,0 +1,109 @@ + + +Cupti: CUpti_ActivityName Struct Reference + + + + + +
+

CUpti_ActivityName Struct Reference
+ +[CUPTI Activity API] +

The activity record providing a name. +More... +

+ + + + + + + + + + + +

Data Fields

CUpti_ActivityKind kind
const char * name
CUpti_ActivityObjectKindId objectId
CUpti_ActivityObjectKind objectKind
+


Detailed Description

+This activity record provides a name for a device, context, thread, etc. and other resource naming done via NVTX APIs (CUPTI_ACTIVITY_KIND_NAME).

Field Documentation

+ +
+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_NAME. +

+

+ +

+
+ + + + +
const char* CUpti_ActivityName::name
+
+
+ +

+The name. +

+

+ +

+ +
+ +

+The identifier for the activity object. 'objectKind' indicates which ID is valid for this record. +

+

+ +

+ +
+ +

+The kind of activity object being named. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityNvLink.html b/doc/Cupti/structCUpti__ActivityNvLink.html new file mode 100644 index 0000000000000000000000000000000000000000..b4797b9547068aa2f9a0787a0ed91e0c7fe44133 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityNvLink.html @@ -0,0 +1,267 @@ + + +Cupti: CUpti_ActivityNvLink Struct Reference + + + + + +
+

CUpti_ActivityNvLink Struct Reference
+ +[CUPTI Activity API] +

NVLink information. (deprecated in CUDA 9.0). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bandwidth
uint32_t flag
union {
idDev0
union {
idDev1
CUpti_ActivityKind kind
uint32_t nvlinkVersion
uint32_t physicalNvLinkCount
int8_t portDev0 [4]
int8_t portDev1 [4]
CUpti_DevType typeDev0
CUpti_DevType typeDev1
uint32_t domainId
uint32_t index
+


Detailed Description

+This structure gives capabilities of each logical NVLink connection between two devices, gpu<->gpu or gpu<->CPU which can be used to understand the topology. NVLink information are now reported using the CUpti_ActivityNvLink2 activity record.

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityNvLink::bandwidth
+
+
+ +

+Banwidth of NVLink in kbytes/sec +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityNvLink::domainId
+
+
+ +

+Domain ID of NPU. On Linux, this can be queried using lspci. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityNvLink::flag
+
+
+ +

+Flag gives capabilities of the link

See also:
CUpti_LinkFlag
+ +
+

+ +

+
+ + + + +
union { ... } CUpti_ActivityNvLink::idDev0
+
+
+ +

+If typeDev0 is CUPTI_DEV_TYPE_GPU, UUID for device 0. CUpti_ActivityDevice4. If typeDev0 is CUPTI_DEV_TYPE_NPU, struct npu for NPU. +

+

+ +

+
+ + + + +
union { ... } CUpti_ActivityNvLink::idDev1
+
+
+ +

+If typeDev1 is CUPTI_DEV_TYPE_GPU, UUID for device 1. CUpti_ActivityDevice4. If typeDev1 is CUPTI_DEV_TYPE_NPU, struct npu for NPU. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityNvLink::index
+
+
+ +

+Index of the NPU. First index will always be zero. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_NVLINK. +

+

+ +

+ +
+ +

+NVLink version. +

+

+ +

+ +
+ +

+Number of physical NVLinks present between two devices. +

+

+ +

+
+ + + + +
int8_t CUpti_ActivityNvLink::portDev0[4]
+
+
+ +

+Port numbers for maximum 4 NVLinks connected to device 0. If typeDev0 is CUPTI_DEV_TYPE_NPU, ignore this field. In case of invalid/unknown port number, this field will be set to value CUPTI_NVLINK_INVALID_PORT. This will be used to correlate the metric values to individual physical link and attribute traffic to the logical NVLink in the topology. +

+

+ +

+
+ + + + +
int8_t CUpti_ActivityNvLink::portDev1[4]
+
+
+ +

+Port numbers for maximum 4 NVLinks connected to device 1. If typeDev1 is CUPTI_DEV_TYPE_NPU, ignore this field. In case of invalid/unknown port number, this field will be set to value CUPTI_NVLINK_INVALID_PORT. This will be used to correlate the metric values to individual physical link and attribute traffic to the logical NVLink in the topology. +

+

+ +

+ +
+ +

+Type of device 0 CUpti_DevType +

+

+ +

+ +
+ +

+Type of device 1 CUpti_DevType +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityNvLink2.html b/doc/Cupti/structCUpti__ActivityNvLink2.html new file mode 100644 index 0000000000000000000000000000000000000000..31285af61163f9e2cdafaeea9caae41166d23681 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityNvLink2.html @@ -0,0 +1,267 @@ + + +Cupti: CUpti_ActivityNvLink2 Struct Reference + + + + + +
+

CUpti_ActivityNvLink2 Struct Reference
+ +[CUPTI Activity API] +

NVLink information. (deprecated in CUDA 10.0). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bandwidth
uint32_t flag
union {
idDev0
union {
idDev1
CUpti_ActivityKind kind
uint32_t nvlinkVersion
uint32_t physicalNvLinkCount
int8_t portDev0 [CUPTI_MAX_NVLINK_PORTS]
int8_t portDev1 [CUPTI_MAX_NVLINK_PORTS]
CUpti_DevType typeDev0
CUpti_DevType typeDev1
uint32_t domainId
uint32_t index
+


Detailed Description

+This structure gives capabilities of each logical NVLink connection between two devices, gpu<->gpu or gpu<->CPU which can be used to understand the topology. NvLink information are now reported using the CUpti_ActivityNvLink4 activity record.

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityNvLink2::bandwidth
+
+
+ +

+Banwidth of NVLink in kbytes/sec +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityNvLink2::domainId
+
+
+ +

+Domain ID of NPU. On Linux, this can be queried using lspci. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityNvLink2::flag
+
+
+ +

+Flag gives capabilities of the link

See also:
CUpti_LinkFlag
+ +
+

+ +

+
+ + + + +
union { ... } CUpti_ActivityNvLink2::idDev0
+
+
+ +

+If typeDev0 is CUPTI_DEV_TYPE_GPU, UUID for device 0. CUpti_ActivityDevice4. If typeDev0 is CUPTI_DEV_TYPE_NPU, struct npu for NPU. +

+

+ +

+
+ + + + +
union { ... } CUpti_ActivityNvLink2::idDev1
+
+
+ +

+If typeDev1 is CUPTI_DEV_TYPE_GPU, UUID for device 1. CUpti_ActivityDevice4. If typeDev1 is CUPTI_DEV_TYPE_NPU, struct npu for NPU. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityNvLink2::index
+
+
+ +

+Index of the NPU. First index will always be zero. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_NVLINK. +

+

+ +

+ +
+ +

+NvLink version. +

+

+ +

+ +
+ +

+Number of physical NVLinks present between two devices. +

+

+ +

+
+ + + + +
int8_t CUpti_ActivityNvLink2::portDev0[CUPTI_MAX_NVLINK_PORTS]
+
+
+ +

+Port numbers for maximum 16 NVLinks connected to device 0. If typeDev0 is CUPTI_DEV_TYPE_NPU, ignore this field. In case of invalid/unknown port number, this field will be set to value CUPTI_NVLINK_INVALID_PORT. This will be used to correlate the metric values to individual physical link and attribute traffic to the logical NVLink in the topology. +

+

+ +

+
+ + + + +
int8_t CUpti_ActivityNvLink2::portDev1[CUPTI_MAX_NVLINK_PORTS]
+
+
+ +

+Port numbers for maximum 16 NVLinks connected to device 1. If typeDev1 is CUPTI_DEV_TYPE_NPU, ignore this field. In case of invalid/unknown port number, this field will be set to value CUPTI_NVLINK_INVALID_PORT. This will be used to correlate the metric values to individual physical link and attribute traffic to the logical NVLink in the topology. +

+

+ +

+ +
+ +

+Type of device 0 CUpti_DevType +

+

+ +

+ +
+ +

+Type of device 1 CUpti_DevType +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityNvLink3.html b/doc/Cupti/structCUpti__ActivityNvLink3.html new file mode 100644 index 0000000000000000000000000000000000000000..8590f2c01c04b20747d4291cffc3f33b139d89e2 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityNvLink3.html @@ -0,0 +1,301 @@ + + +Cupti: CUpti_ActivityNvLink3 Struct Reference + + + + + +
+

CUpti_ActivityNvLink3 Struct Reference
+ +[CUPTI Activity API] +

NVLink information. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bandwidth
uint32_t flag
union {
idDev0
union {
idDev1
CUpti_ActivityKind kind
uint32_t nvlinkVersion
uint8_t nvswitchConnected
uint8_t pad [7]
uint32_t physicalNvLinkCount
int8_t portDev0 [CUPTI_MAX_NVLINK_PORTS]
int8_t portDev1 [CUPTI_MAX_NVLINK_PORTS]
CUpti_DevType typeDev0
CUpti_DevType typeDev1
uint32_t domainId
uint32_t index
+


Detailed Description

+This structure gives capabilities of each logical NVLink connection between two devices, gpu<->gpu or gpu<->CPU which can be used to understand the topology. NvLink information are now reported using the CUpti_ActivityNvLink4 activity record.

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityNvLink3::bandwidth
+
+
+ +

+Banwidth of NVLink in kbytes/sec +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityNvLink3::domainId
+
+
+ +

+Domain ID of NPU. On Linux, this can be queried using lspci. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityNvLink3::flag
+
+
+ +

+Flag gives capabilities of the link

See also:
CUpti_LinkFlag
+ +
+

+ +

+
+ + + + +
union { ... } CUpti_ActivityNvLink3::idDev0
+
+
+ +

+If typeDev0 is CUPTI_DEV_TYPE_GPU, UUID for device 0. CUpti_ActivityDevice4. If typeDev0 is CUPTI_DEV_TYPE_NPU, struct npu for NPU. +

+

+ +

+
+ + + + +
union { ... } CUpti_ActivityNvLink3::idDev1
+
+
+ +

+If typeDev1 is CUPTI_DEV_TYPE_GPU, UUID for device 1. CUpti_ActivityDevice4. If typeDev1 is CUPTI_DEV_TYPE_NPU, struct npu for NPU. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityNvLink3::index
+
+
+ +

+Index of the NPU. First index will always be zero. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_NVLINK. +

+

+ +

+ +
+ +

+NvLink version. +

+

+ +

+ +
+ +

+NVSwitch is connected as an intermediate node. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityNvLink3::pad[7]
+
+
+ +

+Undefined. reserved for internal use +

+

+ +

+ +
+ +

+Number of physical NVLinks present between two devices. +

+

+ +

+
+ + + + +
int8_t CUpti_ActivityNvLink3::portDev0[CUPTI_MAX_NVLINK_PORTS]
+
+
+ +

+Port numbers for maximum 16 NVLinks connected to device 0. If typeDev0 is CUPTI_DEV_TYPE_NPU, ignore this field. In case of invalid/unknown port number, this field will be set to value CUPTI_NVLINK_INVALID_PORT. This will be used to correlate the metric values to individual physical link and attribute traffic to the logical NVLink in the topology. +

+

+ +

+
+ + + + +
int8_t CUpti_ActivityNvLink3::portDev1[CUPTI_MAX_NVLINK_PORTS]
+
+
+ +

+Port numbers for maximum 16 NVLinks connected to device 1. If typeDev1 is CUPTI_DEV_TYPE_NPU, ignore this field. In case of invalid/unknown port number, this field will be set to value CUPTI_NVLINK_INVALID_PORT. This will be used to correlate the metric values to individual physical link and attribute traffic to the logical NVLink in the topology. +

+

+ +

+ +
+ +

+Type of device 0 CUpti_DevType +

+

+ +

+ +
+ +

+Type of device 1 CUpti_DevType +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityNvLink4.html b/doc/Cupti/structCUpti__ActivityNvLink4.html new file mode 100644 index 0000000000000000000000000000000000000000..09874dd21eb29c556494b12c3aea7e391add3771 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityNvLink4.html @@ -0,0 +1,301 @@ + + +Cupti: CUpti_ActivityNvLink4 Struct Reference + + + + + +
+

CUpti_ActivityNvLink4 Struct Reference
+ +[CUPTI Activity API] +

NVLink information. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bandwidth
uint32_t flag
union {
idDev0
union {
idDev1
CUpti_ActivityKind kind
uint32_t nvlinkVersion
uint8_t nvswitchConnected
uint8_t pad [7]
uint32_t physicalNvLinkCount
int8_t portDev0 [CUPTI_MAX_NVLINK_PORTS]
int8_t portDev1 [CUPTI_MAX_NVLINK_PORTS]
CUpti_DevType typeDev0
CUpti_DevType typeDev1
uint32_t domainId
uint32_t index
+


Detailed Description

+This structure gives capabilities of each logical NVLink connection between two devices, gpu<->gpu or gpu<->CPU which can be used to understand the topology.

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityNvLink4::bandwidth
+
+
+ +

+Banwidth of NVLink in kbytes/sec +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityNvLink4::domainId
+
+
+ +

+Domain ID of NPU. On Linux, this can be queried using lspci. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityNvLink4::flag
+
+
+ +

+Flag gives capabilities of the link

See also:
CUpti_LinkFlag
+ +
+

+ +

+
+ + + + +
union { ... } CUpti_ActivityNvLink4::idDev0
+
+
+ +

+If typeDev0 is CUPTI_DEV_TYPE_GPU, UUID for device 0. CUpti_ActivityDevice4. If typeDev0 is CUPTI_DEV_TYPE_NPU, struct npu for NPU. +

+

+ +

+
+ + + + +
union { ... } CUpti_ActivityNvLink4::idDev1
+
+
+ +

+If typeDev1 is CUPTI_DEV_TYPE_GPU, UUID for device 1. CUpti_ActivityDevice4. If typeDev1 is CUPTI_DEV_TYPE_NPU, struct npu for NPU. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityNvLink4::index
+
+
+ +

+Index of the NPU. First index will always be zero. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_NVLINK. +

+

+ +

+ +
+ +

+NvLink version. +

+

+ +

+ +
+ +

+NVSwitch is connected as an intermediate node. +

+

+ +

+
+ + + + +
uint8_t CUpti_ActivityNvLink4::pad[7]
+
+
+ +

+Undefined. reserved for internal use +

+

+ +

+ +
+ +

+Number of physical NVLinks present between two devices. +

+

+ +

+
+ + + + +
int8_t CUpti_ActivityNvLink4::portDev0[CUPTI_MAX_NVLINK_PORTS]
+
+
+ +

+Port numbers for maximum 32 NVLinks connected to device 0. If typeDev0 is CUPTI_DEV_TYPE_NPU, ignore this field. In case of invalid/unknown port number, this field will be set to value CUPTI_NVLINK_INVALID_PORT. This will be used to correlate the metric values to individual physical link and attribute traffic to the logical NVLink in the topology. +

+

+ +

+
+ + + + +
int8_t CUpti_ActivityNvLink4::portDev1[CUPTI_MAX_NVLINK_PORTS]
+
+
+ +

+Port numbers for maximum 32 NVLinks connected to device 1. If typeDev1 is CUPTI_DEV_TYPE_NPU, ignore this field. In case of invalid/unknown port number, this field will be set to value CUPTI_NVLINK_INVALID_PORT. This will be used to correlate the metric values to individual physical link and attribute traffic to the logical NVLink in the topology. +

+

+ +

+ +
+ +

+Type of device 0 CUpti_DevType +

+

+ +

+ +
+ +

+Type of device 1 CUpti_DevType +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityOpenAcc.html b/doc/Cupti/structCUpti__ActivityOpenAcc.html new file mode 100644 index 0000000000000000000000000000000000000000..0b618cb4cf2494045f85b3a5cd15ce1cedb26b88 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityOpenAcc.html @@ -0,0 +1,250 @@ + + +Cupti: CUpti_ActivityOpenAcc Struct Reference + + + + + +
+

CUpti_ActivityOpenAcc Struct Reference
+ +[CUPTI Activity API] +

The base activity record for OpenAcc records. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t cuContextId
uint32_t cuDeviceId
uint32_t cuProcessId
uint32_t cuStreamId
uint32_t cuThreadId
uint64_t end
CUpti_OpenAccEventKind eventKind
uint32_t externalId
CUpti_ActivityKind kind
CUpti_OpenAccConstructKind parentConstruct
uint64_t start
uint32_t threadId
+


Detailed Description

+The OpenACC activity API part uses a CUpti_ActivityOpenAcc as a generic representation for any OpenACC activity. The 'kind' field is used to determine the specific activity kind, and from that the CUpti_ActivityOpenAcc object can be cast to the specific OpenACC activity record type appropriate for that kind.

+Note that all OpenACC activity record types are padded and aligned to ensure that each member of the record is naturally aligned.

+

See also:
CUpti_ActivityKind
+

Field Documentation

+ +
+ +
+ +

+CUDA context id Valid only if deviceType is acc_device_nvidia. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityOpenAcc::cuDeviceId
+
+
+ +

+CUDA device id Valid only if deviceType is acc_device_nvidia. +

+

+ +

+ +
+ +

+The ID of the process where the OpenACC activity is executing. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityOpenAcc::cuStreamId
+
+
+ +

+CUDA stream id Valid only if deviceType is acc_device_nvidia. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityOpenAcc::cuThreadId
+
+
+ +

+The ID of the thread where the OpenACC activity is executing. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityOpenAcc::end
+
+
+ +

+CUPTI end timestamp +

+

+ +

+ +
+ +

+CUPTI OpenACC event kind (

See also:
CUpti_OpenAccEventKind)
+ +
+

+ +

+
+ + + + +
uint32_t CUpti_ActivityOpenAcc::externalId
+
+
+ +

+The OpenACC correlation ID. Valid only if deviceType is acc_device_nvidia. If not 0, it uniquely identifies this record. It is identical to the externalId in the preceeding external correlation record of type CUPTI_EXTERNAL_CORRELATION_KIND_OPENACC. +

+

+ +

+ +
+ +

+The kind of this activity. +

+

+ +

+ +
+ +

+CUPTI OpenACC parent construct kind (

See also:
CUpti_OpenAccConstructKind)
+Note that for applications using PGI OpenACC runtime < 16.1, this will always be CUPTI_OPENACC_CONSTRUCT_KIND_UNKNOWN. +
+

+ +

+
+ + + + +
uint64_t CUpti_ActivityOpenAcc::start
+
+
+ +

+CUPTI start timestamp +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityOpenAcc::threadId
+
+
+ +

+ThreadId +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityOpenAccData.html b/doc/Cupti/structCUpti__ActivityOpenAccData.html new file mode 100644 index 0000000000000000000000000000000000000000..4f4d585f2389026b865d0537a618456b663c5a9a --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityOpenAccData.html @@ -0,0 +1,297 @@ + + +Cupti: CUpti_ActivityOpenAccData Struct Reference + + + + + +
+

CUpti_ActivityOpenAccData Struct Reference
+ +[CUPTI Activity API] +

The activity record for OpenACC data. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t bytes
uint32_t cuContextId
uint32_t cuDeviceId
uint32_t cuProcessId
uint32_t cuStreamId
uint32_t cuThreadId
uint64_t devicePtr
uint64_t end
CUpti_OpenAccEventKind eventKind
uint32_t externalId
uint64_t hostPtr
CUpti_ActivityKind kind
uint32_t pad1
uint64_t start
uint32_t threadId
+


Detailed Description

+(CUPTI_ACTIVITY_KIND_OPENACC_DATA).

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityOpenAccData::bytes
+
+
+ +

+Number of bytes +

+

+ +

+ +
+ +

+CUDA context id Valid only if deviceType is acc_device_nvidia. +

+

+ +

+ +
+ +

+CUDA device id Valid only if deviceType is acc_device_nvidia. +

+

+ +

+ +
+ +

+The ID of the process where the OpenACC activity is executing. +

+

+ +

+ +
+ +

+CUDA stream id Valid only if deviceType is acc_device_nvidia. +

+

+ +

+ +
+ +

+The ID of the thread where the OpenACC activity is executing. +

+

+ +

+ +
+ +

+Device pointer if available +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityOpenAccData::end
+
+
+ +

+CUPTI end timestamp +

+

+ +

+ +
+ +

+CUPTI OpenACC event kind (

See also:
CUpti_OpenAccEventKind)
+ +
+

+ +

+ +
+ +

+The OpenACC correlation ID. Valid only if deviceType is acc_device_nvidia. If not 0, it uniquely identifies this record. It is identical to the externalId in the preceeding external correlation record of type CUPTI_EXTERNAL_CORRELATION_KIND_OPENACC. +

+

+ +

+ +
+ +

+Host pointer if available +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_OPENACC_DATA. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityOpenAccData::pad1
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityOpenAccData::start
+
+
+ +

+CUPTI start timestamp +

+

+ +

+ +
+ +

+ThreadId +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityOpenAccLaunch.html b/doc/Cupti/structCUpti__ActivityOpenAccLaunch.html new file mode 100644 index 0000000000000000000000000000000000000000..aed90d1feb8961bdc29e73bfb58b5ad1f29ae850 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityOpenAccLaunch.html @@ -0,0 +1,297 @@ + + +Cupti: CUpti_ActivityOpenAccLaunch Struct Reference + + + + + +
+

CUpti_ActivityOpenAccLaunch Struct Reference
+ +[CUPTI Activity API] +

The activity record for OpenACC launch. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t cuContextId
uint32_t cuDeviceId
uint32_t cuProcessId
uint32_t cuStreamId
uint32_t cuThreadId
uint64_t end
CUpti_OpenAccEventKind eventKind
uint32_t externalId
CUpti_ActivityKind kind
uint64_t numGangs
uint64_t numWorkers
uint32_t pad1
uint64_t start
uint32_t threadId
uint64_t vectorLength
+


Detailed Description

+(CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH).

Field Documentation

+ +
+ +
+ +

+CUDA context id Valid only if deviceType is acc_device_nvidia. +

+

+ +

+ +
+ +

+CUDA device id Valid only if deviceType is acc_device_nvidia. +

+

+ +

+ +
+ +

+The ID of the process where the OpenACC activity is executing. +

+

+ +

+ +
+ +

+CUDA stream id Valid only if deviceType is acc_device_nvidia. +

+

+ +

+ +
+ +

+The ID of the thread where the OpenACC activity is executing. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityOpenAccLaunch::end
+
+
+ +

+CUPTI end timestamp +

+

+ +

+ +
+ +

+CUPTI OpenACC event kind (

See also:
CUpti_OpenAccEventKind)
+ +
+

+ +

+ +
+ +

+The OpenACC correlation ID. Valid only if deviceType is acc_device_nvidia. If not 0, it uniquely identifies this record. It is identical to the externalId in the preceeding external correlation record of type CUPTI_EXTERNAL_CORRELATION_KIND_OPENACC. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH. +

+

+ +

+ +
+ +

+The number of gangs created for this kernel launch +

+

+ +

+ +
+ +

+The number of workers created for this kernel launch +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityOpenAccLaunch::pad1
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+CUPTI start timestamp +

+

+ +

+ +
+ +

+ThreadId +

+

+ +

+ +
+ +

+The number of vector lanes created for this kernel launch +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityOpenAccOther.html b/doc/Cupti/structCUpti__ActivityOpenAccOther.html new file mode 100644 index 0000000000000000000000000000000000000000..b4d9dec8e401a854e28d287183aeac5bb2bf65a1 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityOpenAccOther.html @@ -0,0 +1,229 @@ + + +Cupti: CUpti_ActivityOpenAccOther Struct Reference + + + + + +
+

CUpti_ActivityOpenAccOther Struct Reference
+ +[CUPTI Activity API] +

The activity record for OpenACC other. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t cuContextId
uint32_t cuDeviceId
uint32_t cuProcessId
uint32_t cuStreamId
uint32_t cuThreadId
uint64_t end
CUpti_OpenAccEventKind eventKind
uint32_t externalId
CUpti_ActivityKind kind
uint64_t start
uint32_t threadId
+


Detailed Description

+(CUPTI_ACTIVITY_KIND_OPENACC_OTHER).

Field Documentation

+ +
+ +
+ +

+CUDA context id Valid only if deviceType is acc_device_nvidia. +

+

+ +

+ +
+ +

+CUDA device id Valid only if deviceType is acc_device_nvidia. +

+

+ +

+ +
+ +

+The ID of the process where the OpenACC activity is executing. +

+

+ +

+ +
+ +

+CUDA stream id Valid only if deviceType is acc_device_nvidia. +

+

+ +

+ +
+ +

+The ID of the thread where the OpenACC activity is executing. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityOpenAccOther::end
+
+
+ +

+CUPTI end timestamp +

+

+ +

+ +
+ +

+CUPTI OpenACC event kind (

See also:
CUpti_OpenAccEventKind)
+ +
+

+ +

+ +
+ +

+The OpenACC correlation ID. Valid only if deviceType is acc_device_nvidia. If not 0, it uniquely identifies this record. It is identical to the externalId in the preceeding external correlation record of type CUPTI_EXTERNAL_CORRELATION_KIND_OPENACC. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_OPENACC_OTHER. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityOpenAccOther::start
+
+
+ +

+CUPTI start timestamp +

+

+ +

+ +
+ +

+ThreadId +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityOpenMp.html b/doc/Cupti/structCUpti__ActivityOpenMp.html new file mode 100644 index 0000000000000000000000000000000000000000..dfc0c07e78e90374a8c8907de46f44d195efe3cd --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityOpenMp.html @@ -0,0 +1,162 @@ + + +Cupti: CUpti_ActivityOpenMp Struct Reference + + + + + +
+

CUpti_ActivityOpenMp Struct Reference
+ +[CUPTI Activity API] +

The base activity record for OpenMp records. +More... +

+ + + + + + + + + + + + + + + + + +

Data Fields

uint32_t cuProcessId
uint32_t cuThreadId
uint64_t end
CUpti_OpenMpEventKind eventKind
CUpti_ActivityKind kind
uint64_t start
uint32_t threadId
+


Detailed Description

+
See also:
CUpti_ActivityKind
+

Field Documentation

+ +
+
+ + + + +
uint32_t CUpti_ActivityOpenMp::cuProcessId
+
+
+ +

+The ID of the process where the OpenMP activity is executing. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityOpenMp::cuThreadId
+
+
+ +

+The ID of the thread where the OpenMP activity is executing. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityOpenMp::end
+
+
+ +

+CUPTI end timestamp +

+

+ +

+
+ + + + +
CUpti_OpenMpEventKind CUpti_ActivityOpenMp::eventKind
+
+
+ +

+CUPTI OpenMP event kind (

See also:
CUpti_OpenMpEventKind)
+ +
+

+ +

+ +
+ +

+The kind of this activity. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityOpenMp::start
+
+
+ +

+CUPTI start timestamp +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityOpenMp::threadId
+
+
+ +

+ThreadId +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityOverhead.html b/doc/Cupti/structCUpti__ActivityOverhead.html new file mode 100644 index 0000000000000000000000000000000000000000..d24b7666c45623c111da7de1b144401e1da3612f --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityOverhead.html @@ -0,0 +1,143 @@ + + +Cupti: CUpti_ActivityOverhead Struct Reference + + + + + +
+

CUpti_ActivityOverhead Struct Reference
+ +[CUPTI Activity API] +

The activity record for CUPTI and driver overheads. +More... +

+ + + + + + + + + + + + + + + +

Data Fields

uint64_t end
CUpti_ActivityKind kind
CUpti_ActivityObjectKindId objectId
CUpti_ActivityObjectKind objectKind
CUpti_ActivityOverheadKind overheadKind
uint64_t start
+


Detailed Description

+This activity record provides CUPTI and driver overhead information (CUPTI_ACTIVITY_OVERHEAD).

Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_ActivityOverhead::end
+
+
+ +

+The end timestamp for the overhead, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the overhead. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_OVERHEAD. +

+

+ +

+ +
+ +

+The identifier for the activity object. 'objectKind' indicates which ID is valid for this record. +

+

+ +

+ +
+ +

+The kind of activity object that the overhead is associated with. +

+

+ +

+ +
+ +

+The kind of overhead, CUPTI, DRIVER, COMPILER etc. +

+

+ +

+
+ + + + +
uint64_t CUpti_ActivityOverhead::start
+
+
+ +

+The start timestamp for the overhead, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the overhead. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityPCSampling.html b/doc/Cupti/structCUpti__ActivityPCSampling.html new file mode 100644 index 0000000000000000000000000000000000000000..b4b7cf0bcc316ab8c5d88d3bbb64f282c12beb3e --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityPCSampling.html @@ -0,0 +1,177 @@ + + +Cupti: CUpti_ActivityPCSampling Struct Reference + + + + + +
+

CUpti_ActivityPCSampling Struct Reference
+ +[CUPTI Activity API] +

The activity record for PC sampling. (deprecated in CUDA 8.0). +More... +

+ + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
CUpti_ActivityFlag flags
uint32_t functionId
CUpti_ActivityKind kind
uint32_t pcOffset
uint32_t samples
uint32_t sourceLocatorId
CUpti_ActivityPCSamplingStallReason stallReason
+


Detailed Description

+This activity records information obtained by sampling PC (CUPTI_ACTIVITY_KIND_PC_SAMPLING). PC sampling activities are now reported using the CUpti_ActivityPCSampling2 activity record.

Field Documentation

+ +
+ +
+ +

+The correlation ID of the kernel to which this result is associated. +

+

+ +

+ +
+ +

+The properties of this instruction. +

+

+ +

+ +
+ +

+Correlation ID with global/device function name +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_PC_SAMPLING. +

+

+ +

+ +
+ +

+The pc offset for the instruction. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityPCSampling::samples
+
+
+ +

+Number of times the PC was sampled with the stallReason in the record. The same PC can be sampled with different stall reasons. +

+

+ +

+ +
+ +

+The ID for source locator. +

+

+ +

+ +
+ +

+Current stall reason. Includes one of the reasons from CUpti_ActivityPCSamplingStallReason +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityPCSampling2.html b/doc/Cupti/structCUpti__ActivityPCSampling2.html new file mode 100644 index 0000000000000000000000000000000000000000..8ea63316c022b107248141ad2f90365f0a0bc7ad --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityPCSampling2.html @@ -0,0 +1,194 @@ + + +Cupti: CUpti_ActivityPCSampling2 Struct Reference + + + + + +
+

CUpti_ActivityPCSampling2 Struct Reference
+ +[CUPTI Activity API] +

The activity record for PC sampling. (deprecated in CUDA 9.0). +More... +

+ + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
CUpti_ActivityFlag flags
uint32_t functionId
CUpti_ActivityKind kind
uint32_t latencySamples
uint32_t pcOffset
uint32_t samples
uint32_t sourceLocatorId
CUpti_ActivityPCSamplingStallReason stallReason
+


Detailed Description

+This activity records information obtained by sampling PC (CUPTI_ACTIVITY_KIND_PC_SAMPLING). PC sampling activities are now reported using the CUpti_ActivityPCSampling3 activity record.

Field Documentation

+ +
+ +
+ +

+The correlation ID of the kernel to which this result is associated. +

+

+ +

+ +
+ +

+The properties of this instruction. +

+

+ +

+ +
+ +

+Correlation ID with global/device function name +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_PC_SAMPLING. +

+

+ +

+ +
+ +

+Number of times the PC was sampled with the stallReason in the record. These samples indicate that no instruction was issued in that cycle from the warp scheduler from where the warp was sampled. Field is valid for devices with compute capability 6.0 and higher +

+

+ +

+ +
+ +

+The pc offset for the instruction. +

+

+ +

+ +
+ +

+Number of times the PC was sampled with the stallReason in the record. The same PC can be sampled with different stall reasons. The count includes latencySamples. +

+

+ +

+ +
+ +

+The ID for source locator. +

+

+ +

+ +
+ +

+Current stall reason. Includes one of the reasons from CUpti_ActivityPCSamplingStallReason +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityPCSampling3.html b/doc/Cupti/structCUpti__ActivityPCSampling3.html new file mode 100644 index 0000000000000000000000000000000000000000..5df337213f24158f80e3e2cf68a532bfaa1c1d4d --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityPCSampling3.html @@ -0,0 +1,194 @@ + + +Cupti: CUpti_ActivityPCSampling3 Struct Reference + + + + + +
+

CUpti_ActivityPCSampling3 Struct Reference
+ +[CUPTI Activity API] +

The activity record for PC sampling. +More... +

+ + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
CUpti_ActivityFlag flags
uint32_t functionId
CUpti_ActivityKind kind
uint32_t latencySamples
uint64_t pcOffset
uint32_t samples
uint32_t sourceLocatorId
CUpti_ActivityPCSamplingStallReason stallReason
+


Detailed Description

+This activity records information obtained by sampling PC (CUPTI_ACTIVITY_KIND_PC_SAMPLING).

Field Documentation

+ +
+ +
+ +

+The correlation ID of the kernel to which this result is associated. +

+

+ +

+ +
+ +

+The properties of this instruction. +

+

+ +

+ +
+ +

+Correlation ID with global/device function name +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_PC_SAMPLING. +

+

+ +

+ +
+ +

+Number of times the PC was sampled with the stallReason in the record. These samples indicate that no instruction was issued in that cycle from the warp scheduler from where the warp was sampled. Field is valid for devices with compute capability 6.0 and higher +

+

+ +

+ +
+ +

+The pc offset for the instruction. +

+

+ +

+ +
+ +

+Number of times the PC was sampled with the stallReason in the record. The same PC can be sampled with different stall reasons. The count includes latencySamples. +

+

+ +

+ +
+ +

+The ID for source locator. +

+

+ +

+ +
+ +

+Current stall reason. Includes one of the reasons from CUpti_ActivityPCSamplingStallReason +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityPCSamplingConfig.html b/doc/Cupti/structCUpti__ActivityPCSamplingConfig.html new file mode 100644 index 0000000000000000000000000000000000000000..91fce484059e4dee8b13638c94da2e5beedeb01b --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityPCSamplingConfig.html @@ -0,0 +1,93 @@ + + +Cupti: CUpti_ActivityPCSamplingConfig Struct Reference + + + + + +
+

CUpti_ActivityPCSamplingConfig Struct Reference
+ +[CUPTI Activity API] +

PC sampling configuration structure. +More... +

+ + + + + + + + + +

Data Fields

CUpti_ActivityPCSamplingPeriod samplingPeriod
uint32_t samplingPeriod2
uint32_t size
+


Detailed Description

+This structure defines the pc sampling configuration.

+See function cuptiActivityConfigurePCSampling


Field Documentation

+ +
+ +
+ +

+There are 5 level provided for sampling period. The level internally maps to a period in terms of cycles. Same level can map to different number of cycles on different gpus. No of cycles will be chosen to minimize information loss. The period chosen will be given by samplingPeriodInCycles in CUpti_ActivityPCSamplingRecordInfo for each kernel instance. +

+

+ +

+ +
+ +

+This will override the period set by samplingPeriod. Value 0 in samplingPeriod2 will be considered as samplingPeriod2 should not be used and samplingPeriod should be used. Valid values for samplingPeriod2 are between 5 to 31 both inclusive. This will set the sampling period to (2^samplingPeriod2) cycles. +

+

+ +

+ +
+ +

+Size of configuration structure. CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityPCSamplingRecordInfo.html b/doc/Cupti/structCUpti__ActivityPCSamplingRecordInfo.html new file mode 100644 index 0000000000000000000000000000000000000000..3077961886f8faffb077e6b26eaa72032ac65ff3 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityPCSamplingRecordInfo.html @@ -0,0 +1,126 @@ + + +Cupti: CUpti_ActivityPCSamplingRecordInfo Struct Reference + + + + + +
+

CUpti_ActivityPCSamplingRecordInfo Struct Reference
+ +[CUPTI Activity API] +

The activity record for record status for PC sampling. +More... +

+ + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
uint64_t droppedSamples
CUpti_ActivityKind kind
uint64_t samplingPeriodInCycles
uint64_t totalSamples
+


Detailed Description

+This activity records information obtained by sampling PC (CUPTI_ACTIVITY_KIND_PC_SAMPLING_RECORD_INFO).

Field Documentation

+ +
+ +
+ +

+The correlation ID of the kernel to which this result is associated. +

+

+ +

+ +
+ +

+Number of samples that were dropped by hardware due to backpressure/overflow. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_PC_SAMPLING_RECORD_INFO. +

+

+ +

+ +
+ +

+Sampling period in terms of number of cycles . +

+

+ +

+ +
+ +

+Number of times the PC was sampled for this kernel instance including all dropped samples. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityPcie.html b/doc/Cupti/structCUpti__ActivityPcie.html new file mode 100644 index 0000000000000000000000000000000000000000..57532dfe5a9a50a4886cd68cc7635a2cf2af34c3 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityPcie.html @@ -0,0 +1,334 @@ + + +Cupti: CUpti_ActivityPcie Struct Reference + + + + + +
+

CUpti_ActivityPcie Struct Reference
+ +[CUPTI Activity API] +

PCI devices information required to construct topology. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

union {
attr
uint32_t domain
union {
   uint32_t   bridgeId
   CUdevice   devId
id
CUpti_ActivityKind kind
uint16_t linkRate
uint16_t linkWidth
uint16_t pcieGeneration
CUpti_PcieDeviceType type
uint16_t upstreamBus
uint16_t deviceId
uint16_t pad0
CUdevice peerDev [CUPTI_MAX_GPUS]
uint16_t secondaryBus
CUuuid uuidDev
uint16_t vendorId
+


Detailed Description

+This structure gives capabilities of GPU and PCI bridge connected to the PCIE bus which can be used to understand the topology.

Field Documentation

+ +
+
+ + + + +
union { ... } CUpti_ActivityPcie::attr
+
+
+ +

+Attributes for more information about GPU (gpuAttr) or PCI Bridge (bridgeAttr) +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityPcie::bridgeId
+
+
+ +

+A unique identifier for Bridge in the Topology +

+

+ +

+
+ + + + +
uint16_t CUpti_ActivityPcie::deviceId
+
+
+ +

+Device ID of the bridge +

+

+ +

+
+ + + + +
CUdevice CUpti_ActivityPcie::devId
+
+
+ +

+GPU device ID +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityPcie::domain
+
+
+ +

+Domain for the GPU or Bridge, required to identify which PCIE bus it belongs to in multiple NUMA systems. +

+

+ +

+
+ + + + +
union { ... } CUpti_ActivityPcie::id
+
+
+ +

+A unique identifier for GPU or Bridge in Topology +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_PCIE. +

+

+ +

+
+ + + + +
uint16_t CUpti_ActivityPcie::linkRate
+
+
+ +

+Link rate of the GPU or bridge in gigatransfers per second (GT/s) +

+

+ +

+
+ + + + +
uint16_t CUpti_ActivityPcie::linkWidth
+
+
+ +

+Link width of the GPU or bridge +

+

+ +

+
+ + + + +
uint16_t CUpti_ActivityPcie::pad0
+
+
+ +

+Padding for alignment +

+

+ +

+ +
+ +

+PCIE Generation of GPU or Bridge. +

+

+ +

+
+ + + + +
CUdevice CUpti_ActivityPcie::peerDev[CUPTI_MAX_GPUS]
+
+
+ +

+CUdevice with which this device has P2P capability. This can also be obtained by querying cuDeviceCanAccessPeer or cudaDeviceCanAccessPeer APIs +

+

+ +

+
+ + + + +
uint16_t CUpti_ActivityPcie::secondaryBus
+
+
+ +

+The downstream bus number, used to search downstream devices/bridges connected to this bridge. +

+

+ +

+ +
+ +

+Type of device in topology, CUpti_PcieDeviceType. If type is CUPTI_PCIE_DEVICE_TYPE_GPU use devId for id and gpuAttr and if type is CUPTI_PCIE_DEVICE_TYPE_BRIDGE use bridgeId for id and bridgeAttr. +

+

+ +

+
+ + + + +
uint16_t CUpti_ActivityPcie::upstreamBus
+
+
+ +

+Upstream bus ID for the GPU or PCI bridge. Required to identify which bus it is connected to in the topology. +

+

+ +

+
+ + + + +
CUuuid CUpti_ActivityPcie::uuidDev
+
+
+ +

+UUID for the device. CUpti_ActivityDevice4. +

+

+ +

+
+ + + + +
uint16_t CUpti_ActivityPcie::vendorId
+
+
+ +

+Vendor ID of the bridge +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityPreemption.html b/doc/Cupti/structCUpti__ActivityPreemption.html new file mode 100644 index 0000000000000000000000000000000000000000..2d1d658fad253275631c5488cc96f8473bf0f4a7 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityPreemption.html @@ -0,0 +1,177 @@ + + +Cupti: CUpti_ActivityPreemption Struct Reference + + + + + +
+

CUpti_ActivityPreemption Struct Reference
+ +[CUPTI Activity API] +

The activity record for a preemption of a CDP kernel. +More... +

+ + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t blockX
uint32_t blockY
uint32_t blockZ
int64_t gridId
CUpti_ActivityKind kind
uint32_t pad
CUpti_ActivityPreemptionKind preemptionKind
uint64_t timestamp
+


Detailed Description

+This activity record represents a preemption of a CDP kernel.

Field Documentation

+ +
+
+ + + + +
uint32_t CUpti_ActivityPreemption::blockX
+
+
+ +

+The X-dimension of the block that is preempted +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityPreemption::blockY
+
+
+ +

+The Y-dimension of the block that is preempted +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityPreemption::blockZ
+
+
+ +

+The Z-dimension of the block that is preempted +

+

+ +

+ +
+ +

+The grid-id of the block that is preempted +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_PREEMPTION +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityPreemption::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+kind of the preemption +

+

+ +

+ +
+ +

+The timestamp of the preemption, in ns. A value of 0 indicates that timestamp information could not be collected for the preemption. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivitySharedAccess.html b/doc/Cupti/structCUpti__ActivitySharedAccess.html new file mode 100644 index 0000000000000000000000000000000000000000..07658b56753689d646aaf749f6c2935bd1081400 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivitySharedAccess.html @@ -0,0 +1,228 @@ + + +Cupti: CUpti_ActivitySharedAccess Struct Reference + + + + + +
+

CUpti_ActivitySharedAccess Struct Reference
+ +[CUPTI Activity API] +

The activity record for source-level shared access. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t correlationId
uint32_t executed
CUpti_ActivityFlag flags
uint32_t functionId
CUpti_ActivityKind kind
uint32_t pad
uint32_t pcOffset
uint64_t sharedTransactions
uint32_t sourceLocatorId
uint64_t theoreticalSharedTransactions
uint64_t threadsExecuted
+


Detailed Description

+This activity records the locations of the shared accesses in the source (CUPTI_ACTIVITY_KIND_SHARED_ACCESS).

Field Documentation

+ +
+ +
+ +

+The correlation ID of the kernel to which this result is associated. +

+

+ +

+ +
+ +

+The number of times this instruction was executed per warp. It will be incremented when at least one of thread among warp is active with predicate and condition code evaluating to true. +

+

+ +

+ +
+ +

+The properties of this shared access. +

+

+ +

+ +
+ +

+Correlation ID with global/device function name +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_SHARED_ACCESS. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivitySharedAccess::pad
+
+
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The pc offset for the access. +

+

+ +

+ +
+ +

+The total number of shared memory transactions generated by this access +

+

+ +

+ +
+ +

+The ID for source locator. +

+

+ +

+ +
+ +

+The minimum number of shared memory transactions possible based on the access pattern. +

+

+ +

+ +
+ +

+This increments each time when this instruction is executed by number of threads that executed this instruction with predicate and condition code evaluating to true. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivitySourceLocator.html b/doc/Cupti/structCUpti__ActivitySourceLocator.html new file mode 100644 index 0000000000000000000000000000000000000000..8656ee4371fb10507c984734a64766e777665a69 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivitySourceLocator.html @@ -0,0 +1,109 @@ + + +Cupti: CUpti_ActivitySourceLocator Struct Reference + + + + + +
+

CUpti_ActivitySourceLocator Struct Reference
+ +[CUPTI Activity API] +

The activity record for source locator. +More... +

+ + + + + + + + + + + +

Data Fields

const char * fileName
uint32_t id
CUpti_ActivityKind kind
uint32_t lineNumber
+


Detailed Description

+This activity record represents a source locator (CUPTI_ACTIVITY_KIND_SOURCE_LOCATOR).

Field Documentation

+ +
+
+ + + + +
const char* CUpti_ActivitySourceLocator::fileName
+
+
+ +

+The path for the file. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivitySourceLocator::id
+
+
+ +

+The ID for the source path, will be used in all the source level results. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_SOURCE_LOCATOR. +

+

+ +

+ +
+ +

+The line number in the source . +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityStream.html b/doc/Cupti/structCUpti__ActivityStream.html new file mode 100644 index 0000000000000000000000000000000000000000..fdd119ce46d5559710bb96152fbafa73cd1f31d1 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityStream.html @@ -0,0 +1,143 @@ + + +Cupti: CUpti_ActivityStream Struct Reference + + + + + +
+

CUpti_ActivityStream Struct Reference
+ +[CUPTI Activity API] +

The activity record for CUDA stream. +More... +

+ + + + + + + + + + + + + + + +

Data Fields

uint32_t contextId
uint32_t correlationId
CUpti_ActivityStreamFlag flag
CUpti_ActivityKind kind
uint32_t priority
uint32_t streamId
+


Detailed Description

+This activity is used to track created streams. (CUPTI_ACTIVITY_KIND_STREAM).

Field Documentation

+ +
+
+ + + + +
uint32_t CUpti_ActivityStream::contextId
+
+
+ +

+The ID of the context where the stream was created. +

+

+ +

+ +
+ +

+The correlation ID of the API to which this result is associated. +

+

+ +

+ +
+ +

+Flags associated with the stream. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_STREAM. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityStream::priority
+
+
+ +

+The clamped priority for the stream. +

+

+ +

+
+ + + + +
uint32_t CUpti_ActivityStream::streamId
+
+
+ +

+A unique stream ID to identify the stream. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivitySynchronization.html b/doc/Cupti/structCUpti__ActivitySynchronization.html new file mode 100644 index 0000000000000000000000000000000000000000..3f3705bbb03de1fa815ae5e41047af30e243b64d --- /dev/null +++ b/doc/Cupti/structCUpti__ActivitySynchronization.html @@ -0,0 +1,177 @@ + + +Cupti: CUpti_ActivitySynchronization Struct Reference + + + + + +
+

CUpti_ActivitySynchronization Struct Reference
+ +[CUPTI Activity API] +

The activity record for synchronization management. +More... +

+ + + + + + + + + + + + + + + + + + + +

Data Fields

uint32_t contextId
uint32_t correlationId
uint32_t cudaEventId
uint64_t end
CUpti_ActivityKind kind
uint64_t start
uint32_t streamId
CUpti_ActivitySynchronizationType type
+


Detailed Description

+This activity is used to track various CUDA synchronization APIs. (CUPTI_ACTIVITY_KIND_SYNCHRONIZATION).

Field Documentation

+ +
+ +
+ +

+The ID of the context for which the synchronization API is called. In case of context synchronization API it is the context id for which the API is called. In case of stream/event synchronization it is the ID of the context where the stream/event was created. +

+

+ +

+ +
+ +

+The correlation ID of the API to which this result is associated. +

+

+ +

+ +
+ +

+The event ID for which the synchronization API is called. A CUPTI_SYNCHRONIZATION_INVALID_VALUE value indicate the field is not applicable for this record. Not valid for cuCtxSynchronize, cuStreamSynchronize. +

+

+ +

+ +
+ +

+The end timestamp for the function, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the function. +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_SYNCHRONIZATION. +

+

+ +

+ +
+ +

+The start timestamp for the function, in ns. A value of 0 for both the start and end timestamps indicates that timestamp information could not be collected for the function. +

+

+ +

+ +
+ +

+The compute stream for which the synchronization API is called. A CUPTI_SYNCHRONIZATION_INVALID_VALUE value indicate the field is not applicable for this record. Not valid for cuCtxSynchronize, cuEventSynchronize. +

+

+ +

+ +
+ +

+The type of record. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityUnifiedMemoryCounter.html b/doc/Cupti/structCUpti__ActivityUnifiedMemoryCounter.html new file mode 100644 index 0000000000000000000000000000000000000000..4135140ad2cdfbd9a63b75eac0303b462503358c --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityUnifiedMemoryCounter.html @@ -0,0 +1,177 @@ + + +Cupti: CUpti_ActivityUnifiedMemoryCounter Struct Reference + + + + + +
+

CUpti_ActivityUnifiedMemoryCounter Struct Reference
+ +[CUPTI Activity API] +

The activity record for Unified Memory counters (deprecated in CUDA 7.0). +More... +

+ + + + + + + + + + + + + + + + + + + +

Data Fields

CUpti_ActivityUnifiedMemoryCounterKind counterKind
uint32_t deviceId
CUpti_ActivityKind kind
uint32_t pad
uint32_t processId
CUpti_ActivityUnifiedMemoryCounterScope scope
uint64_t timestamp
uint64_t value
+


Detailed Description

+This activity record represents a Unified Memory counter (CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER).

Field Documentation

+ +

+ +

+ +
+ +

+The ID of the device involved in the memory transfer operation. It is not relevant if the scope of the counter is global (all devices). +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The ID of the process to which this record belongs to. In case of global scope, processId is undefined. +

+

+ +

+ +

+ +
+ +

+The timestamp when this sample was retrieved, in ns. A value of 0 indicates that timestamp information could not be collected +

+

+ +

+ +
+ +

+Value of the counter +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityUnifiedMemoryCounter2.html b/doc/Cupti/structCUpti__ActivityUnifiedMemoryCounter2.html new file mode 100644 index 0000000000000000000000000000000000000000..b5383e38aa823b2e0bb8c49312583dea342aa112 --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityUnifiedMemoryCounter2.html @@ -0,0 +1,245 @@ + + +Cupti: CUpti_ActivityUnifiedMemoryCounter2 Struct Reference + + + + + +
+

CUpti_ActivityUnifiedMemoryCounter2 Struct Reference
+ +[CUPTI Activity API] +

The activity record for Unified Memory counters (CUDA 7.0 and beyond). +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t address
CUpti_ActivityUnifiedMemoryCounterKind counterKind
uint32_t dstId
uint64_t end
uint32_t flags
CUpti_ActivityKind kind
uint32_t pad
uint32_t processId
uint32_t srcId
uint64_t start
uint32_t streamId
uint64_t value
+


Detailed Description

+This activity record represents a Unified Memory counter (CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER).

Field Documentation

+ +
+ +
+ +

+This is the virtual base address of the page/s being transferred. For cpu and gpu faults, the virtual address for the page that faulted. +

+

+ +

+ +
+ +

+The Unified Memory counter kind +

+

+ +

+ +
+ +

+The ID of the destination CPU/device involved in the memory transfer or remote map operation. Ignore this field if counterKind is CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT or CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT or CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THRASHING or CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THROTTLING +

+

+ +

+ +
+ +

+The end timestamp of the counter, in ns. Ignore this field if counterKind is CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT or CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THRASHING or CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_REMOTE_MAP. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD and CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH, timestamp is captured when activity finishes on GPU. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT, timestamp is captured when CUDA driver queues the replay of faulting memory accesses on the GPU For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THROTTLING, timestamp is captured when throttling operation was finished by CUDA driver +

+

+ +

+ +
+ +

+The flags associated with this record. See enums CUpti_ActivityUnifiedMemoryAccessType if counterKind is CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT and CUpti_ActivityUnifiedMemoryMigrationCause if counterKind is CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD or CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD and CUpti_ActivityUnifiedMemoryRemoteMapCause if counterKind is CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_REMOTE_MAP and CUpti_ActivityFlag if counterKind is CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THRASHING or CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THROTTLING +

+

+ +

+ +
+ +

+The activity record kind, must be CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER +

+

+ +

+ +
+ +

+Undefined. Reserved for internal use. +

+

+ +

+ +
+ +

+The ID of the process to which this record belongs to. +

+

+ +

+ +
+ +

+The ID of the source CPU/device involved in the memory transfer, page fault, thrashing, throttling or remote map operation. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THRASHING, it is a bitwise ORing of the device IDs fighting for the memory region. Ignore this field if counterKind is CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT +

+

+ +

+ +
+ +

+The start timestamp of the counter, in ns. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD and CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH, timestamp is captured when activity starts on GPU. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT and CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT, timestamp is captured when CUDA driver started processing the fault. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THRASHING, timestamp is captured when CUDA driver detected thrashing of memory region. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THROTTLING, timestamp is captured when throttling opeeration was started by CUDA driver. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_REMOTE_MAP, timestamp is captured when CUDA driver has pushed all required operations to the processor specified by dstId. +

+

+ +

+ +
+ +

+The ID of the stream causing the transfer. This value of this field is invalid. +

+

+ +

+ +
+ +

+Value of the counter For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD, CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH, CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_THREASHING and CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_REMOTE_MAP, it is the size of the memory region in bytes. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT, it is the number of page fault groups for the same page. For counterKind CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT, it is the program counter for the instruction that caused fault. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ActivityUnifiedMemoryCounterConfig.html b/doc/Cupti/structCUpti__ActivityUnifiedMemoryCounterConfig.html new file mode 100644 index 0000000000000000000000000000000000000000..fd152bfb5802d25f98930b1dc122b0f8063c141e --- /dev/null +++ b/doc/Cupti/structCUpti__ActivityUnifiedMemoryCounterConfig.html @@ -0,0 +1,109 @@ + + +Cupti: CUpti_ActivityUnifiedMemoryCounterConfig Struct Reference + + + + + +
+

CUpti_ActivityUnifiedMemoryCounterConfig Struct Reference
+ +[CUPTI Activity API] +

Unified Memory counters configuration structure. +More... +

+ + + + + + + + + + + +

Data Fields

uint32_t deviceId
uint32_t enable
CUpti_ActivityUnifiedMemoryCounterKind kind
CUpti_ActivityUnifiedMemoryCounterScope scope
+


Detailed Description

+This structure controls the enable/disable of the various Unified Memory counters consisting of scope, kind and other parameters. See function cuptiActivityConfigureUnifiedMemoryCounter

Field Documentation

+ +
+ +
+ +

+Device id of the traget device. This is relevant only for single device scopes. (deprecated in CUDA 7.0) +

+

+ +

+ +
+ +

+Control to enable/disable the counter. To enable the counter set it to non-zero value while disable is indicated by zero. +

+

+ +

+ +
+ +

+Unified Memory counter Counter kind +

+

+ +

+ +
+ +

+Unified Memory counter Counter scope. (deprecated in CUDA 7.0) +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__CallbackData.html b/doc/Cupti/structCUpti__CallbackData.html new file mode 100644 index 0000000000000000000000000000000000000000..fc468bd544ef621d1e28134419a25a0ac75ece2d --- /dev/null +++ b/doc/Cupti/structCUpti__CallbackData.html @@ -0,0 +1,194 @@ + + +Cupti: CUpti_CallbackData Struct Reference + + + + + +
+

CUpti_CallbackData Struct Reference
+ +[CUPTI Callback API] +

Data passed into a runtime or driver API callback function. +More... +

+ + + + + + + + + + + + + + + + + + + + + +

Data Fields

CUpti_ApiCallbackSite callbackSite
CUcontext context
uint32_t contextUid
uint64_t * correlationData
uint32_t correlationId
const char * functionName
const void * functionParams
void * functionReturnValue
const char * symbolName
+


Detailed Description

+Data passed into a runtime or driver API callback function as the cbdata argument to CUpti_CallbackFunc. The cbdata will be this type for domain equal to CUPTI_CB_DOMAIN_DRIVER_API or CUPTI_CB_DOMAIN_RUNTIME_API. The callback data is valid only within the invocation of the callback function that is passed the data. If you need to retain some data for use outside of the callback, you must make a copy of that data. For example, if you make a shallow copy of CUpti_CallbackData within a callback, you cannot dereference functionParams outside of that callback to access the function parameters. functionName is an exception: the string pointed to by functionName is a global constant and so may be accessed outside of the callback.

Field Documentation

+ +
+ +
+ +

+Point in the runtime or driver function from where the callback was issued. +

+

+ +

+
+ + + + +
CUcontext CUpti_CallbackData::context
+
+
+ +

+Driver context current to the thread, or null if no context is current. This value can change from the entry to exit callback of a runtime API function if the runtime initializes a context. +

+

+ +

+
+ + + + +
uint32_t CUpti_CallbackData::contextUid
+
+
+ +

+Unique ID for the CUDA context associated with the thread. The UIDs are assigned sequentially as contexts are created and are unique within a process. +

+

+ +

+ +
+ +

+Pointer to data shared between the entry and exit callbacks of a given runtime or drive API function invocation. This field can be used to pass 64-bit values from the entry callback to the corresponding exit callback. +

+

+ +

+
+ + + + +
uint32_t CUpti_CallbackData::correlationId
+
+
+ +

+The activity record correlation ID for this callback. For a driver domain callback (i.e. domain CUPTI_CB_DOMAIN_DRIVER_API) this ID will equal the correlation ID in the CUpti_ActivityAPI record corresponding to the CUDA driver function call. For a runtime domain callback (i.e. domain CUPTI_CB_DOMAIN_RUNTIME_API) this ID will equal the correlation ID in the CUpti_ActivityAPI record corresponding to the CUDA runtime function call. Within the callback, this ID can be recorded to correlate user data with the activity record. This field is new in 4.1. +

+

+ +

+
+ + + + +
const char* CUpti_CallbackData::functionName
+
+
+ +

+Name of the runtime or driver API function which issued the callback. This string is a global constant and so may be accessed outside of the callback. +

+

+ +

+
+ + + + +
const void* CUpti_CallbackData::functionParams
+
+
+ +

+Pointer to the arguments passed to the runtime or driver API call. See generated_cuda_runtime_api_meta.h and generated_cuda_meta.h for structure definitions for the parameters for each runtime and driver API function. +

+

+ +

+ +
+ +

+Pointer to the return value of the runtime or driver API call. This field is only valid within the exit::CUPTI_API_EXIT callback. For a runtime API functionReturnValue points to a cudaError_t. For a driver API functionReturnValue points to a CUresult. +

+

+ +

+
+ + + + +
const char* CUpti_CallbackData::symbolName
+
+
+ +

+Name of the symbol operated on by the runtime or driver API function which issued the callback. This entry is valid only for driver and runtime launch callbacks, where it returns the name of the kernel. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__EventGroupSet.html b/doc/Cupti/structCUpti__EventGroupSet.html new file mode 100644 index 0000000000000000000000000000000000000000..2283cdb822968942de97290b3555fa414e1923eb --- /dev/null +++ b/doc/Cupti/structCUpti__EventGroupSet.html @@ -0,0 +1,75 @@ + + +Cupti: CUpti_EventGroupSet Struct Reference + + + + + +
+

CUpti_EventGroupSet Struct Reference
+ +[CUPTI Event API] +

A set of event groups. +More... +

+ + + + + + + +

Data Fields

CUpti_EventGroupeventGroups
uint32_t numEventGroups
+


Detailed Description

+A set of event groups. When returned by cuptiEventGroupSetsCreate and cuptiMetricCreateEventGroupSets a set indicates that event groups that can be enabled at the same time (i.e. all the events in the set can be collected simultaneously).

Field Documentation

+ +
+ +
+ +

+An array of numEventGroups event groups. +

+

+ +

+ +
+ +

+The number of event groups in the set. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__EventGroupSets.html b/doc/Cupti/structCUpti__EventGroupSets.html new file mode 100644 index 0000000000000000000000000000000000000000..851d38b0db4b30fc0959274b1ffe0b1dc52115df --- /dev/null +++ b/doc/Cupti/structCUpti__EventGroupSets.html @@ -0,0 +1,75 @@ + + +Cupti: CUpti_EventGroupSets Struct Reference + + + + + +
+

CUpti_EventGroupSets Struct Reference
+ +[CUPTI Event API] +

A set of event group sets. +More... +

+ + + + + + + +

Data Fields

uint32_t numSets
CUpti_EventGroupSetsets
+


Detailed Description

+A set of event group sets. When returned by cuptiEventGroupSetsCreate and cuptiMetricCreateEventGroupSets a CUpti_EventGroupSets indicates the number of passes required to collect all the events, and the event groups that should be collected during each pass.

Field Documentation

+ +
+
+ + + + +
uint32_t CUpti_EventGroupSets::numSets
+
+
+ +

+Number of event group sets. +

+

+ +

+ +
+ +

+An array of numSets event group sets. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__GetCubinCrcParams.html b/doc/Cupti/structCUpti__GetCubinCrcParams.html new file mode 100644 index 0000000000000000000000000000000000000000..7d6b52596bac48228911218fad9364fa808d73bc --- /dev/null +++ b/doc/Cupti/structCUpti__GetCubinCrcParams.html @@ -0,0 +1,108 @@ + + +Cupti: CUpti_GetCubinCrcParams Struct Reference + + + + + +
+

CUpti_GetCubinCrcParams Struct Reference
+ +[CUPTI PC Sampling API] +

Params for cuptiGetCubinCrc. + +

+ + + + + + + + + + + +

Data Fields

const void * cubin
uint64_t cubinCrc
size_t cubinSize
size_t size
+


Field Documentation

+ +
+
+ + + + +
const void* CUpti_GetCubinCrcParams::cubin
+
+
+ +

+[w] Pointer to cubin binary +

+

+ +

+
+ + + + +
uint64_t CUpti_GetCubinCrcParams::cubinCrc
+
+
+ +

+[r] Computed CRC will be stored in it. +

+

+ +

+ +
+ +

+[w] Size of cubin binary. +

+

+ +

+
+ + + + +
size_t CUpti_GetCubinCrcParams::size
+
+
+ +

+[w] Size of configuration structure. CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__GetSassToSourceCorrelationParams.html b/doc/Cupti/structCUpti__GetSassToSourceCorrelationParams.html new file mode 100644 index 0000000000000000000000000000000000000000..44ed8108cdc2844ea0d4ba9ff57a7f04ee794f9a --- /dev/null +++ b/doc/Cupti/structCUpti__GetSassToSourceCorrelationParams.html @@ -0,0 +1,176 @@ + + +Cupti: CUpti_GetSassToSourceCorrelationParams Struct Reference + + + + + +
+

CUpti_GetSassToSourceCorrelationParams Struct Reference
+ +[CUPTI PC Sampling API] +

Params for cuptiGetSassToSourceCorrelation. + +

+ + + + + + + + + + + + + + + + + + + +

Data Fields

const void * cubin
size_t cubinSize
char * dirName
char * fileName
const char * functionName
uint32_t lineNumber
uint64_t pcOffset
size_t size
+


Field Documentation

+ +
+ +
+ +

+[w] Pointer to cubin binary where function belongs. +

+

+ +

+ +
+ +

+[w] Size of cubin binary. +

+

+ +

+ +
+ +

+[r] Path for the directory of source file. +

+

+ +

+ +
+ +

+[r] Path for the source file. +

+

+ +

+ +
+ +

+[w] Function name to which PC belongs. +

+

+ +

+ +
+ +

+[r] Line number in the source code. +

+

+ +

+ +
+ +

+[w] PC offset +

+

+ +

+ +
+ +

+[w] Size of the data structure i.e. CUpti_GetSassToSourceCorrelationParamsSize CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__GraphData.html b/doc/Cupti/structCUpti__GraphData.html new file mode 100644 index 0000000000000000000000000000000000000000..b726327830be0410066dc74d91eacad85511a2f0 --- /dev/null +++ b/doc/Cupti/structCUpti__GraphData.html @@ -0,0 +1,180 @@ + + +Cupti: CUpti_GraphData Struct Reference + + + + + +
+

CUpti_GraphData Struct Reference
+ +[CUPTI Callback API] +

CUDA graphs data passed into a resource callback function. +More... +

+ + + + + + + + + + + + + + + + + +

Data Fields

CUgraphNode dependency
CUgraph graph
CUgraphExec graphExec
CUgraphNode node
CUgraphNodeType nodeType
CUgraph originalGraph
CUgraphNode originalNode
+


Detailed Description

+CUDA graphs data passed into a resource callback function as the cbdata argument to CUpti_CallbackFunc. The cbdata will be this type for domain equal to CUPTI_CB_DOMAIN_RESOURCE. The graph data is valid only within the invocation of the callback function that is passed the data. If you need to retain some data for use outside of the callback, you must make a copy of that data.

Field Documentation

+ +
+
+ + + + +
CUgraphNode CUpti_GraphData::dependency
+
+
+ +

+The dependent graph node The size of the array is

Parameters:
+ + +
numDependencies. 
+
+ +
+

+ +

+
+ + + + +
CUgraph CUpti_GraphData::graph
+
+
+ +

+CUDA graph +

+

+ +

+
+ + + + +
CUgraphExec CUpti_GraphData::graphExec
+
+
+ +

+CUDA executable graph +

+

+ +

+
+ + + + +
CUgraphNode CUpti_GraphData::node
+
+
+ +

+CUDA graph node +

+

+ +

+
+ + + + +
CUgraphNodeType CUpti_GraphData::nodeType
+
+
+ +

+Type of the

Parameters:
+ + +
node 
+
+ +
+

+ +

+
+ + + + +
CUgraph CUpti_GraphData::originalGraph
+
+
+ +

+The original CUDA graph from which

Parameters:
+ + +
graph is cloned
+
+ +
+

+ +

+
+ + + + +
CUgraphNode CUpti_GraphData::originalNode
+
+
+ +

+The original CUDA graph node from which

Parameters:
+ + +
node is cloned
+
+ +
+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ModuleResourceData.html b/doc/Cupti/structCUpti__ModuleResourceData.html new file mode 100644 index 0000000000000000000000000000000000000000..325bb90a8a8426cd15692c7309c822b951fb7a5e --- /dev/null +++ b/doc/Cupti/structCUpti__ModuleResourceData.html @@ -0,0 +1,92 @@ + + +Cupti: CUpti_ModuleResourceData Struct Reference + + + + + +
+

CUpti_ModuleResourceData Struct Reference
+ +[CUPTI Callback API] +

Module data passed into a resource callback function. +More... +

+ + + + + + + + + +

Data Fields

size_t cubinSize
uint32_t moduleId
const char * pCubin
+


Detailed Description

+CUDA module data passed into a resource callback function as the cbdata argument to CUpti_CallbackFunc. The cbdata will be this type for domain equal to CUPTI_CB_DOMAIN_RESOURCE. The module data is valid only within the invocation of the callback function that is passed the data. If you need to retain some data for use outside of the callback, you must make a copy of that data.

Field Documentation

+ +
+ +
+ +

+The size of the cubin. +

+

+ +

+ +
+ +

+Identifier to associate with the CUDA module. +

+

+ +

+
+ + + + +
const char* CUpti_ModuleResourceData::pCubin
+
+
+ +

+Pointer to the associated cubin. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__NvtxData.html b/doc/Cupti/structCUpti__NvtxData.html new file mode 100644 index 0000000000000000000000000000000000000000..1c2addad7b01b3a79eee64c85169db21a14e81a1 --- /dev/null +++ b/doc/Cupti/structCUpti__NvtxData.html @@ -0,0 +1,92 @@ + + +Cupti: CUpti_NvtxData Struct Reference + + + + + +
+

CUpti_NvtxData Struct Reference
+ +[CUPTI Callback API] +

Data passed into a NVTX callback function. +More... +

+ + + + + + + + + +

Data Fields

const char * functionName
const void * functionParams
const void * functionReturnValue
+


Detailed Description

+Data passed into a NVTX callback function as the cbdata argument to CUpti_CallbackFunc. The cbdata will be this type for domain equal to CUPTI_CB_DOMAIN_NVTX. Unless otherwise notes, the callback data is valid only within the invocation of the callback function that is passed the data. If you need to retain some data for use outside of the callback, you must make a copy of that data.

Field Documentation

+ +
+
+ + + + +
const char* CUpti_NvtxData::functionName
+
+
+ +

+Name of the NVTX API function which issued the callback. This string is a global constant and so may be accessed outside of the callback. +

+

+ +

+
+ + + + +
const void* CUpti_NvtxData::functionParams
+
+
+ +

+Pointer to the arguments passed to the NVTX API call. See generated_nvtx_meta.h for structure definitions for the parameters for each NVTX API function. +

+

+ +

+
+ + + + +
const void* CUpti_NvtxData::functionReturnValue
+
+
+ +

+Pointer to the return value of the NVTX API call. See nvToolsExt.h for each NVTX API function's return value. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__PCSamplingConfigurationInfo.html b/doc/Cupti/structCUpti__PCSamplingConfigurationInfo.html new file mode 100644 index 0000000000000000000000000000000000000000..7b2237be781f9f41a6a29938fe0f7f19f60743b3 --- /dev/null +++ b/doc/Cupti/structCUpti__PCSamplingConfigurationInfo.html @@ -0,0 +1,229 @@ + + +Cupti: CUpti_PCSamplingConfigurationInfo Struct Reference + + + + + +
+

CUpti_PCSamplingConfigurationInfo Struct Reference
+ +[CUPTI PC Sampling API] +

PC sampling configuration information structure. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

CUpti_PCSamplingConfigurationAttributeType attributeType
struct {
collectionModeData
struct {
enableStartStopControlData
struct {
hardwareBufferSizeData
struct {
invalidData
struct {
outputDataFormatData
struct {
samplingDataBufferData
struct {
samplingPeriodData
struct {
scratchBufferSizeData
struct {
stallReasonData
+


Detailed Description

+This structure provides CUpti_PCSamplingConfigurationAttributeType which can be configured or queried for PC sampling configuration

Field Documentation

+ +

+ +

+ +

+ +

+ +

+ +
+ +

+Invalid Value +

+

+ +

+ +

+ +

+ +

+ +

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__PCSamplingConfigurationInfoParams.html b/doc/Cupti/structCUpti__PCSamplingConfigurationInfoParams.html new file mode 100644 index 0000000000000000000000000000000000000000..46bf4bbd034a00c53aee3bf12c6637fb2a518565 --- /dev/null +++ b/doc/Cupti/structCUpti__PCSamplingConfigurationInfoParams.html @@ -0,0 +1,126 @@ + + +Cupti: CUpti_PCSamplingConfigurationInfoParams Struct Reference + + + + + +
+

CUpti_PCSamplingConfigurationInfoParams Struct Reference
+ +[CUPTI PC Sampling API] +

PC sampling configuration structure. +More... +

+ + + + + + + + + + + + + +

Data Fields

CUcontext ctx
size_t numAttributes
CUpti_PCSamplingConfigurationInfopPCSamplingConfigurationInfo
void * pPriv
size_t size
+


Detailed Description

+This structure configures PC sampling using cuptiPCSamplingSetConfigurationAttribute and queries PC sampling default configuration using cuptiPCSamplingGetConfigurationAttribute

Field Documentation

+ +
+ +
+ +

+[w] CUcontext +

+

+ +

+ +
+ +

+[w] Number of attributes to configure using cuptiPCSamplingSetConfigurationAttribute or query using cuptiPCSamplingGetConfigurationAttribute +

+

+ +

+ +

+ +
+ +

+[w] Assign to NULL +

+

+ +

+ +
+ +

+[w] Size of the data structure i.e. CUpti_PCSamplingConfigurationInfoParamsSize CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__PCSamplingData.html b/doc/Cupti/structCUpti__PCSamplingData.html new file mode 100644 index 0000000000000000000000000000000000000000..9144029559a8c37de3f4abd4d3f3d657b0916853 --- /dev/null +++ b/doc/Cupti/structCUpti__PCSamplingData.html @@ -0,0 +1,194 @@ + + +Cupti: CUpti_PCSamplingData Struct Reference + + + + + +
+

CUpti_PCSamplingData Struct Reference
+ +[CUPTI PC Sampling API] +

Collected PC Sampling data. + +

+ + + + + + + + + + + + + + + + + + + + + + +

Data Fields

size_t collectNumPcs
uint64_t droppedSamples
uint64_t nonUsrKernelsTotalSamples
CUpti_PCSamplingPCDatapPcData
 collectNumPcs
uint64_t rangeId
size_t remainingNumPcs
size_t size
size_t totalNumPcs
uint64_t totalSamples
+


Field Documentation

+ +
+ +
+ +

+[w] Number of PCs to be collected +

+

+ +

+ +
+ +

+[r] Number of samples that were dropped by hardware due to backpressure/overflow. +

+

+ +

+ +
+ +

+[r] Number of samples collected across all non user kernels PCs. It includes samples for non-user kernels. It includes counts for all non selected stall reasons as well. CUPTI does not provide PC records for non-user kernels. +

+

+ +

+ +
+ +

+[r] Profiled PC data This data struct should have enough memory to collect number of PCs mentioned in +

+

+ +

+
+ + + + +
uint64_t CUpti_PCSamplingData::rangeId
+
+
+ +

+[r] Unique identifier for each range. Data collected across multiple ranges in multiple buffers can be identified using range id. +

+

+ +

+ +
+ +

+[r] Number of PCs available for collection +

+

+ +

+
+ + + + +
size_t CUpti_PCSamplingData::size
+
+
+ +

+[w] Size of the data structure. CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+ +

+ +
+ +

+[r] Number of PCs collected +

+

+ +

+ +
+ +

+[r] Number of samples collected across all PCs. It includes samples for user modules, samples for non-user kernels and dropped samples. It includes counts for all non selected stall reasons. CUPTI does not provide PC records for non-user kernels. CUPTI does not provide PC records for instructions for which all selected stall reason metrics counts are zero. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__PCSamplingDisableParams.html b/doc/Cupti/structCUpti__PCSamplingDisableParams.html new file mode 100644 index 0000000000000000000000000000000000000000..49e51ba350e68beaf2a3779bbfa089cd5bfbab02 --- /dev/null +++ b/doc/Cupti/structCUpti__PCSamplingDisableParams.html @@ -0,0 +1,91 @@ + + +Cupti: CUpti_PCSamplingDisableParams Struct Reference + + + + + +
+

CUpti_PCSamplingDisableParams Struct Reference
+ +[CUPTI PC Sampling API] +

Params for cuptiPCSamplingDisable. + +

+ + + + + + + + + +

Data Fields

CUcontext ctx
void * pPriv
size_t size
+


Field Documentation

+ +
+
+ + + + +
CUcontext CUpti_PCSamplingDisableParams::ctx
+
+
+ +

+[w] CUcontext +

+

+ +

+ +
+ +

+[w] Assign to NULL +

+

+ +

+ +
+ +

+[w] Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__PCSamplingEnableParams.html b/doc/Cupti/structCUpti__PCSamplingEnableParams.html new file mode 100644 index 0000000000000000000000000000000000000000..7252789a866ddee1b7e80b588b2739454217ccf2 --- /dev/null +++ b/doc/Cupti/structCUpti__PCSamplingEnableParams.html @@ -0,0 +1,91 @@ + + +Cupti: CUpti_PCSamplingEnableParams Struct Reference + + + + + +
+

CUpti_PCSamplingEnableParams Struct Reference
+ +[CUPTI PC Sampling API] +

Params for cuptiPCSamplingEnable. + +

+ + + + + + + + + +

Data Fields

CUcontext ctx
void * pPriv
size_t size
+


Field Documentation

+ +
+
+ + + + +
CUcontext CUpti_PCSamplingEnableParams::ctx
+
+
+ +

+[w] CUcontext +

+

+ +

+ +
+ +

+[w] Assign to NULL +

+

+ +

+ +
+ +

+[w] Size of the data structure i.e. CUpti_PCSamplingEnableParamsSize CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__PCSamplingGetDataParams.html b/doc/Cupti/structCUpti__PCSamplingGetDataParams.html new file mode 100644 index 0000000000000000000000000000000000000000..50b9740153ef69de2c9bbdb3d0cfb901bdd1c60b --- /dev/null +++ b/doc/Cupti/structCUpti__PCSamplingGetDataParams.html @@ -0,0 +1,113 @@ + + +Cupti: CUpti_PCSamplingGetDataParams Struct Reference + + + + + +
+

CUpti_PCSamplingGetDataParams Struct Reference
+ +[CUPTI PC Sampling API] +

Params for cuptiPCSamplingEnable. + +

+ + + + + + + + + + + +

Data Fields

CUcontext ctx
void * pcSamplingData
void * pPriv
size_t size
+


Field Documentation

+ +
+
+ + + + +
CUcontext CUpti_PCSamplingGetDataParams::ctx
+
+
+ +

+[w] CUcontext +

+

+ +

+ +
+ +

+

Parameters:
+ + +
pcSamplingData Data buffer to hold collected PC Sampling data PARSED_DATA Buffer type is void * which can point to PARSED_DATA Refer CUpti_PCSamplingData for buffer format for PARSED_DATA
+
+ +
+

+ +

+ +
+ +

+[w] Assign to NULL +

+

+ +

+ +
+ +

+[w] Size of the data structure i.e. CUpti_PCSamplingGetDataParamsSize CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__PCSamplingGetNumStallReasonsParams.html b/doc/Cupti/structCUpti__PCSamplingGetNumStallReasonsParams.html new file mode 100644 index 0000000000000000000000000000000000000000..0686f8c1c946c5c37083a62f4b0f2d16599f57f5 --- /dev/null +++ b/doc/Cupti/structCUpti__PCSamplingGetNumStallReasonsParams.html @@ -0,0 +1,108 @@ + + +Cupti: CUpti_PCSamplingGetNumStallReasonsParams Struct Reference + + + + + +
+

CUpti_PCSamplingGetNumStallReasonsParams Struct Reference
+ +[CUPTI PC Sampling API] +

Params for cuptiPCSamplingGetNumStallReasons. + +

+ + + + + + + + + + + +

Data Fields

CUcontext ctx
size_t * numStallReasons
void * pPriv
size_t size
+


Field Documentation

+ +
+ +
+ +

+[w] CUcontext +

+

+ +

+ +
+ +

+[r] Number of stall reasons +

+

+ +

+ +
+ +

+[w] Assign to NULL +

+

+ +

+ +
+ +

+[w] Size of the data structure i.e. CUpti_PCSamplingGetNumStallReasonsParamsSize CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__PCSamplingGetStallReasonsParams.html b/doc/Cupti/structCUpti__PCSamplingGetStallReasonsParams.html new file mode 100644 index 0000000000000000000000000000000000000000..c05e9b97986a7d0bdf1be075a5554380951bb51e --- /dev/null +++ b/doc/Cupti/structCUpti__PCSamplingGetStallReasonsParams.html @@ -0,0 +1,142 @@ + + +Cupti: CUpti_PCSamplingGetStallReasonsParams Struct Reference + + + + + +
+

CUpti_PCSamplingGetStallReasonsParams Struct Reference
+ +[CUPTI PC Sampling API] +

Params for cuptiPCSamplingGetStallReasons. + +

+ + + + + + + + + + + + + + + +

Data Fields

CUcontext ctx
size_t numStallReasons
void * pPriv
size_t size
uint32_t * stallReasonIndex
char ** stallReasons
+


Field Documentation

+ +
+ +
+ +

+[w] CUcontext +

+

+ +

+ +
+ +

+[w] Number of stall reasons +

+

+ +

+ +
+ +

+[w] Assign to NULL +

+

+ +

+ +
+ +

+[w] Size of the data structure i.e. CUpti_PCSamplingGetStallReasonsParamsSize CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+ +

+ +
+ +

+[r] Stall reason index +

+

+ +

+ +
+ +

+[r] Stall reasons name +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__PCSamplingPCData.html b/doc/Cupti/structCUpti__PCSamplingPCData.html new file mode 100644 index 0000000000000000000000000000000000000000..127f0b35d0d1ec12654a3922fc5c9f4f0d6ccd8c --- /dev/null +++ b/doc/Cupti/structCUpti__PCSamplingPCData.html @@ -0,0 +1,176 @@ + + +Cupti: CUpti_PCSamplingPCData Struct Reference + + + + + +
+

CUpti_PCSamplingPCData Struct Reference
+ +[CUPTI PC Sampling API] +

PC Sampling data. + +

+ + + + + + + + + + + + + + + + + + + +

Data Fields

uint64_t cubinCrc
uint32_t functionIndex
char * functionName
uint32_t pad
uint64_t pcOffset
size_t size
CUpti_PCSamplingStallReasonstallReason
size_t stallReasonCount
+


Field Documentation

+ +
+
+ + + + +
uint64_t CUpti_PCSamplingPCData::cubinCrc
+
+
+ +

+[r] Unique cubin id +

+

+ +

+ +
+ +

+The function's unique symbol index in the module. +

+

+ +

+ +
+ +

+[r] The function name. This name string might be shared across all the records including records from activity APIs representing the same function, and so it should not be modified or freed until post processing of all the records is done. Once done, it is user’s responsibility to free the memory using free() function. +

+

+ +

+
+ + + + +
uint32_t CUpti_PCSamplingPCData::pad
+
+
+ +

+Padding +

+

+ +

+
+ + + + +
uint64_t CUpti_PCSamplingPCData::pcOffset
+
+
+ +

+[r] PC offset +

+

+ +

+
+ + + + +
size_t CUpti_PCSamplingPCData::size
+
+
+ +

+[w] Size of the data structure. CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+ +

+ +
+ +

+[r] Stall reason id Total samples +

+

+ +

+ +
+ +

+[r] Collected stall reason count +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__PCSamplingStallReason.html b/doc/Cupti/structCUpti__PCSamplingStallReason.html new file mode 100644 index 0000000000000000000000000000000000000000..278bd7f0b4a9fc0afc8d02efac92e492e341d351 --- /dev/null +++ b/doc/Cupti/structCUpti__PCSamplingStallReason.html @@ -0,0 +1,74 @@ + + +Cupti: CUpti_PCSamplingStallReason Struct Reference + + + + + +
+

CUpti_PCSamplingStallReason Struct Reference
+ +[CUPTI PC Sampling API] +

PC Sampling stall reasons. + +

+ + + + + + + +

Data Fields

uint32_t pcSamplingStallReasonIndex
uint32_t samples
+


Field Documentation

+ +
+ +
+ +

+[r] Collected stall reason index +

+

+ +

+ +
+ +

+[r] Number of times the PC was sampled with the stallReason. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__PCSamplingStartParams.html b/doc/Cupti/structCUpti__PCSamplingStartParams.html new file mode 100644 index 0000000000000000000000000000000000000000..0514a26fdae8279eec3ce5d219f1dad0ce50eadf --- /dev/null +++ b/doc/Cupti/structCUpti__PCSamplingStartParams.html @@ -0,0 +1,91 @@ + + +Cupti: CUpti_PCSamplingStartParams Struct Reference + + + + + +
+

CUpti_PCSamplingStartParams Struct Reference
+ +[CUPTI PC Sampling API] +

Params for cuptiPCSamplingStart. + +

+ + + + + + + + + +

Data Fields

CUcontext ctx
void * pPriv
size_t size
+


Field Documentation

+ +
+
+ + + + +
CUcontext CUpti_PCSamplingStartParams::ctx
+
+
+ +

+[w] CUcontext +

+

+ +

+ +
+ +

+[w] Assign to NULL +

+

+ +

+ +
+ +

+[w] Size of the data structure i.e. CUpti_PCSamplingStartParamsSize CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__PCSamplingStopParams.html b/doc/Cupti/structCUpti__PCSamplingStopParams.html new file mode 100644 index 0000000000000000000000000000000000000000..d7577654be4b587bdd59fb515436d24812742597 --- /dev/null +++ b/doc/Cupti/structCUpti__PCSamplingStopParams.html @@ -0,0 +1,91 @@ + + +Cupti: CUpti_PCSamplingStopParams Struct Reference + + + + + +
+

CUpti_PCSamplingStopParams Struct Reference
+ +[CUPTI PC Sampling API] +

Params for cuptiPCSamplingStop. + +

+ + + + + + + + + +

Data Fields

CUcontext ctx
void * pPriv
size_t size
+


Field Documentation

+ +
+
+ + + + +
CUcontext CUpti_PCSamplingStopParams::ctx
+
+
+ +

+[w] CUcontext +

+

+ +

+ +
+ +

+[w] Assign to NULL +

+

+ +

+ +
+ +

+[w] Size of the data structure i.e. CUpti_PCSamplingStopParamsSize CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are available in the structure. Used to preserve backward compatibility. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__BeginPass__Params.html b/doc/Cupti/structCUpti__Profiler__BeginPass__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..53186737bccc4dc7062fb6ed1e69d8fa19b02f45 --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__BeginPass__Params.html @@ -0,0 +1,51 @@ + + +Cupti: CUpti_Profiler_BeginPass_Params Struct Reference + + + + + +
+

CUpti_Profiler_BeginPass_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerBeginPass. + +

+ + + + + + + + + + + + +

Data Fields

+CUcontext ctx
 [in] if NULL, the current CUcontext is used
+void * pPriv
 [in] assign to NULL
+size_t structSize
 [in] CUpti_Profiler_BeginPass_Params_STRUCT_SIZE
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__BeginSession__Params.html b/doc/Cupti/structCUpti__Profiler__BeginSession__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..f419c27dd32018a819d89ddac1fa4de2f58822b0 --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__BeginSession__Params.html @@ -0,0 +1,91 @@ + + +Cupti: CUpti_Profiler_BeginSession_Params Struct Reference + + + + + +
+

CUpti_Profiler_BeginSession_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerBeginSession. + +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

+uint8_t bDumpCounterDataInFile
 [in] [optional]
+size_t counterDataImageSize
 [in] size calculated from cuptiProfilerCounterDataImageCalculateSize
+size_t counterDataScratchBufferSize
 [in] size calculated from cuptiProfilerCounterDataImageInitializeScratchBuffer
+CUcontext ctx
 [in] if NULL, the current CUcontext is used
+size_t maxLaunchesPerPass
 [in] Maximum number of kernel launches that can be recorded in a single pass; must be >= maxRangesPerPass.
+size_t maxRangesPerPass
 [in] Maximum number of ranges that can be recorded in a single pass.
+const char * pCounterDataFilePath
 [in] [optional]
+uint8_t * pCounterDataImage
 [in] address of CounterDataImage
+uint8_t * pCounterDataScratchBuffer
 [in] address of CounterDataImage scratch buffer
+void * pPriv
 [in] assign to NULL
+CUpti_ProfilerRange range
 [in] CUpti_ProfilerRange
+CUpti_ProfilerReplayMode replayMode
 [in] CUpti_ProfilerReplayMode
+size_t structSize
 [in] CUpti_Profiler_BeginSession_Params_STRUCT_SIZE
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__CounterDataImageOptions.html b/doc/Cupti/structCUpti__Profiler__CounterDataImageOptions.html new file mode 100644 index 0000000000000000000000000000000000000000..167fc1185267289a1d0c9affa6a1c0b99580c19a --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__CounterDataImageOptions.html @@ -0,0 +1,81 @@ + + +Cupti: CUpti_Profiler_CounterDataImageOptions Struct Reference + + + + + +
+

CUpti_Profiler_CounterDataImageOptions Struct Reference
+ +[CUPTI Profiling API] +

Input parameter to define the counterDataImage. + +

+ + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

+size_t counterDataPrefixSize
 [in] Size of CounterDataPrefix generated from NVPW_CounterDataBuilder_GetCounterDataPrefix().
+uint32_t maxNumRanges
 [in] Maximum number of ranges that can be profiled
+uint32_t maxNumRangeTreeNodes
 [in] Maximum number of RangeTree nodes; must be >= maxNumRanges
+uint32_t maxRangeNameLength
 [in] Maximum string length of each RangeName, including the trailing NULL character
const uint8_t * pCounterDataPrefix
+void * pPriv
 [in] assign to NULL
+size_t structSize
 [in] CUpti_Profiler_CounterDataImageOptions_Params_STRUCT_SIZE
+


Field Documentation

+ +
+ +
+ +

+[in] Address of CounterDataPrefix generated from NVPW_CounterDataBuilder_GetCounterDataPrefix(). Must be align(8). +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__CounterDataImage__CalculateScratchBufferSize__Params.html b/doc/Cupti/structCUpti__Profiler__CounterDataImage__CalculateScratchBufferSize__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..da5a22dfd9b3772af7026f6e8927ed42462173da --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__CounterDataImage__CalculateScratchBufferSize__Params.html @@ -0,0 +1,59 @@ + + +Cupti: CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params Struct Reference + + + + + +
+

CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerCounterDataImageCalculateScratchBufferSize. + +

+ + + + + + + + + + + + + + + + + + +

Data Fields

+size_t counterDataImageSize
 [in] size calculated from cuptiProfilerCounterDataImageCalculateSize
+size_t counterDataScratchBufferSize
 [out]
+uint8_t * pCounterDataImage
 [in]
+void * pPriv
 [in] assign to NULL
+size_t structSize
 [in] CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__CounterDataImage__CalculateSize__Params.html b/doc/Cupti/structCUpti__Profiler__CounterDataImage__CalculateSize__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..ac218dd4b1a8c8b853e9df8b6aae2a9844dbacaa --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__CounterDataImage__CalculateSize__Params.html @@ -0,0 +1,60 @@ + + +Cupti: CUpti_Profiler_CounterDataImage_CalculateSize_Params Struct Reference + + + + + +
+

CUpti_Profiler_CounterDataImage_CalculateSize_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerCounterDataImageCalculateSize. + +

+ + + + + + + + + + + + + + + + + + +

Data Fields

+size_t counterDataImageSize
 [out]
+const
+CUpti_Profiler_CounterDataImageOptions
pOptions
 [in] Pointer to Counter Data Image Options
+void * pPriv
 [in] assign to NULL
+size_t sizeofCounterDataImageOptions
 [in] CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE
+size_t structSize
 [in] CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__CounterDataImage__InitializeScratchBuffer__Params.html b/doc/Cupti/structCUpti__Profiler__CounterDataImage__InitializeScratchBuffer__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..291d10d6f07dd06c8e85a93164a82c024e826638 --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__CounterDataImage__InitializeScratchBuffer__Params.html @@ -0,0 +1,63 @@ + + +Cupti: CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params Struct Reference + + + + + +
+

CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerCounterDataImageInitializeScratchBuffer. + +

+ + + + + + + + + + + + + + + + + + + + + +

Data Fields

+size_t counterDataImageSize
 [in] size calculated from cuptiProfilerCounterDataImageCalculateSize
+size_t counterDataScratchBufferSize
 [in] size calculated using cuptiProfilerCounterDataImageCalculateScratchBufferSize
+uint8_t * pCounterDataImage
 [in]
+uint8_t * pCounterDataScratchBuffer
 [in] the scratch buffer to be initialized.
+void * pPriv
 [in] assign to NULL
+size_t structSize
 [in] CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__CounterDataImage__Initialize__Params.html b/doc/Cupti/structCUpti__Profiler__CounterDataImage__Initialize__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..e92599972e72e0418ca0518c27d28fca6d2de14f --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__CounterDataImage__Initialize__Params.html @@ -0,0 +1,64 @@ + + +Cupti: CUpti_Profiler_CounterDataImage_Initialize_Params Struct Reference + + + + + +
+

CUpti_Profiler_CounterDataImage_Initialize_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerCounterDataImageInitialize. + +

+ + + + + + + + + + + + + + + + + + + + + +

Data Fields

+size_t counterDataImageSize
 [in] Size calculated from cuptiProfilerCounterDataImageCalculateSize
+uint8_t * pCounterDataImage
 [in] The buffer to be initialized.
+const
+CUpti_Profiler_CounterDataImageOptions
pOptions
 [in] Pointer to Counter Data Image Options
+void * pPriv
 [in] assign to NULL
+size_t sizeofCounterDataImageOptions
 [in] CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE
+size_t structSize
 [in] CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__DeInitialize__Params.html b/doc/Cupti/structCUpti__Profiler__DeInitialize__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..a7e8f3e39d25d268e567d548d438d34ba344c78e --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__DeInitialize__Params.html @@ -0,0 +1,47 @@ + + +Cupti: CUpti_Profiler_DeInitialize_Params Struct Reference + + + + + +
+

CUpti_Profiler_DeInitialize_Params Struct Reference
+ +[CUPTI Profiling API] +

Default parameter for cuptiProfilerDeInitialize. + +

+ + + + + + + + + +

Data Fields

+void * pPriv
 [in] assign to NULL
+size_t structSize
 [in] CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__DeviceSupported__Params.html b/doc/Cupti/structCUpti__Profiler__DeviceSupported__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..4e0a15d268fa411959414a3b2ececf62aa2483b2 --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__DeviceSupported__Params.html @@ -0,0 +1,75 @@ + + +Cupti: CUpti_Profiler_DeviceSupported_Params Struct Reference + + + + + +
+

CUpti_Profiler_DeviceSupported_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerDeviceSupported. + +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

+CUpti_Profiler_Support_Level architecture
 [out] SUPPORTED if the device architecture level supports the Profiling API (Compute Capability >= 7.0), UNSUPPORTED otherwise
+CUpti_Profiler_Support_Level cmp
 [out] SUPPORTED if not NVIDIA Crypto Mining Processors (CMP), UNSUPPORTED otherwise
+CUpti_Profiler_Support_Level confidentialCompute
 [out] SUPPORTED if confidential compute is not enabled, UNSUPPORTED otherwise
+CUdevice cuDevice
 [in] if NULL, the current CUcontext is used
+CUpti_Profiler_Support_Level isSupported
 [out] overall SUPPORTED / UNSUPPORTED flag representing whether Profiling and PC Sampling APIs work on the given device and configuration. SUPPORTED if all following flags are SUPPORTED, UNSUPPORTED otherwise.
+void * pPriv
 [in] assign to NULL
+CUpti_Profiler_Support_Level sli
 [out] SUPPORTED if SLI is not enabled, UNSUPPORTED otherwise
+size_t structSize
 [in] Must be CUpti_Profiler_DeviceSupported_Params_STRUCT_SIZE
+CUpti_Profiler_Support_Level vGpu
 [out] SUPPORTED if vGPU is supported and profiling is enabled, DISABLED if profiling is supported but not enabled, UNSUPPORTED otherwise
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__DisableProfiling__Params.html b/doc/Cupti/structCUpti__Profiler__DisableProfiling__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..9a04a7968ff6a42141a6b2b552c13f34f746407e --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__DisableProfiling__Params.html @@ -0,0 +1,51 @@ + + +Cupti: CUpti_Profiler_DisableProfiling_Params Struct Reference + + + + + +
+

CUpti_Profiler_DisableProfiling_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerDisableProfiling. + +

+ + + + + + + + + + + + +

Data Fields

+CUcontext ctx
 [in] if NULL, the current CUcontext is used
+void * pPriv
 [in] assign to NULL
+size_t structSize
 [in] CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__EnableProfiling__Params.html b/doc/Cupti/structCUpti__Profiler__EnableProfiling__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..44f0542ef07b4ddc09717a187d7f2f096b74e001 --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__EnableProfiling__Params.html @@ -0,0 +1,51 @@ + + +Cupti: CUpti_Profiler_EnableProfiling_Params Struct Reference + + + + + +
+

CUpti_Profiler_EnableProfiling_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerEnableProfiling. + +

+ + + + + + + + + + + + +

Data Fields

+CUcontext ctx
 [in] if NULL, the current CUcontext is used
+void * pPriv
 [in] assign to NULL
+size_t structSize
 [in] CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__EndPass__Params.html b/doc/Cupti/structCUpti__Profiler__EndPass__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..7a765389a14cadfc89ed9b22ace71baa31c14d0a --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__EndPass__Params.html @@ -0,0 +1,74 @@ + + +Cupti: CUpti_Profiler_EndPass_Params Struct Reference + + + + + +
+

CUpti_Profiler_EndPass_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerEndPass. + +

+ + + + + + + + + + + + + + + + + + +

Data Fields

+uint8_t allPassesSubmitted
 [out] becomes true when the last pass has been queued to the GPU
+CUcontext ctx
 [in] if NULL, the current CUcontext is used
size_t passIndex
 [out] The targetNestingLevel that will be collected by the *next* BeginPass.
+void * pPriv
 [in] assign to NULL
+size_t structSize
 [in] CUpti_Profiler_EndPass_Params_STRUCT_SIZE
+


Field Documentation

+ +
+ +
+ +

+[out] The passIndex that will be collected by the *next* BeginPass +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__EndSession__Params.html b/doc/Cupti/structCUpti__Profiler__EndSession__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..1eddc942527d56caaba6e19a9345dbc0e8fe3db1 --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__EndSession__Params.html @@ -0,0 +1,51 @@ + + +Cupti: CUpti_Profiler_EndSession_Params Struct Reference + + + + + +
+

CUpti_Profiler_EndSession_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerEndSession. + +

+ + + + + + + + + + + + +

Data Fields

+CUcontext ctx
 [in] if NULL, the current CUcontext is used
+void * pPriv
 [in] assign to NULL
+size_t structSize
 [in] CUpti_Profiler_EndSession_Params_STRUCT_SIZE
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__FlushCounterData__Params.html b/doc/Cupti/structCUpti__Profiler__FlushCounterData__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..bbbf3eaf9c014427d961b7478f9370b9853dd847 --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__FlushCounterData__Params.html @@ -0,0 +1,59 @@ + + +Cupti: CUpti_Profiler_FlushCounterData_Params Struct Reference + + + + + +
+

CUpti_Profiler_FlushCounterData_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerFlushCounterData. + +

+ + + + + + + + + + + + + + + + + + +

Data Fields

+CUcontext ctx
 [in] if NULL, the current CUcontext is used
+size_t numRangesDropped
 [out] number of ranges whose data was dropped in the processed passes
+size_t numTraceBytesDropped
 [out] number of bytes not written to TraceBuffer due to buffer full
+void * pPriv
 [in] assign to NULL
+size_t structSize
 [in] CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__GetCounterAvailability__Params.html b/doc/Cupti/structCUpti__Profiler__GetCounterAvailability__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..3bd47d5bbe1c86c65284ddb9ee0289cbf02a8b67 --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__GetCounterAvailability__Params.html @@ -0,0 +1,73 @@ + + +Cupti: CUpti_Profiler_GetCounterAvailability_Params Struct Reference + + + + + +
+

CUpti_Profiler_GetCounterAvailability_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerGetCounterAvailability. + +

+ + + + + + + + + + + + + + + + + +

Data Fields

size_t counterAvailabilityImageSize
+CUcontext ctx
 [in] if NULL, the current CUcontext is used
+uint8_t * pCounterAvailabilityImage
 [in] buffer receiving counter availability image, may be NULL
+void * pPriv
 [in] assign to NULL
+size_t structSize
 [in] CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE
+


Field Documentation

+ +
+ +
+ +

+[in/out] If `pCounterAvailabilityImage` is NULL, then the required size is returned in `counterAvailabilityImageSize`, otherwise `counterAvailabilityImageSize` should be set to the size of `pCounterAvailabilityImage`, and on return it would be overwritten with number of actual bytes copied +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__Initialize__Params.html b/doc/Cupti/structCUpti__Profiler__Initialize__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..1543b4b92488f659c5b83f92c04d0253cd3e08ed --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__Initialize__Params.html @@ -0,0 +1,47 @@ + + +Cupti: CUpti_Profiler_Initialize_Params Struct Reference + + + + + +
+

CUpti_Profiler_Initialize_Params Struct Reference
+ +[CUPTI Profiling API] +

Default parameter for cuptiProfilerInitialize. + +

+ + + + + + + + + +

Data Fields

+void * pPriv
 [in] assign to NULL
+size_t structSize
 [in] CUpti_Profiler_Initialize_Params_STRUCT_SIZE
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__IsPassCollected__Params.html b/doc/Cupti/structCUpti__Profiler__IsPassCollected__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..57f11c772e59549c039a27c903f2da9784754ac0 --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__IsPassCollected__Params.html @@ -0,0 +1,67 @@ + + +Cupti: CUpti_Profiler_IsPassCollected_Params Struct Reference + + + + + +
+

CUpti_Profiler_IsPassCollected_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerIsPassCollected. + +

+ + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

+uint8_t allPassesCollected
 [out] becomes true when the last pass has been decoded
+CUcontext ctx
 [in] if NULL, the current CUcontext is used
+size_t numRangesDropped
 [out] number of ranges whose data was dropped in the processed pass
+size_t numTraceBytesDropped
 [out] number of bytes not written to TraceBuffer due to buffer full
+uint8_t onePassCollected
 [out] true if a pass was successfully decoded
+void * pPriv
 [in] assign to NULL
+size_t structSize
 [in] CUpti_Profiler_IsPassCollected_Params_STRUCT_SIZE
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__SetConfig__Params.html b/doc/Cupti/structCUpti__Profiler__SetConfig__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..8ddd6c3ff471ad4aca698349ea3860a28ea239b4 --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__SetConfig__Params.html @@ -0,0 +1,75 @@ + + +Cupti: CUpti_Profiler_SetConfig_Params Struct Reference + + + + + +
+

CUpti_Profiler_SetConfig_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerSetConfig. + +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

+size_t configSize
 [in] size of config
+CUcontext ctx
 [in] if NULL, the current CUcontext is used
+uint16_t minNestingLevel
 [in] the lowest nesting level to be profiled; must be >= 1
+uint16_t numNestingLevels
 [in] the number of nesting levels to profile; must be >= 1
+size_t passIndex
 [in] Set this to zero for in-app replay; set this to the output of EndPass() for application replay
+const uint8_t * pConfig
 [in] Config created by NVPW_RawMetricsConfig_GetConfigImage(). Must be align(8).
+void * pPriv
 [in] assign to NULL
+size_t structSize
 [in] CUpti_Profiler_SetConfig_Params_STRUCT_SIZE
+uint16_t targetNestingLevel
 [in] Set this to minNestingLevel for in-app replay; set this to the output of EndPass() for application
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__Profiler__UnsetConfig__Params.html b/doc/Cupti/structCUpti__Profiler__UnsetConfig__Params.html new file mode 100644 index 0000000000000000000000000000000000000000..591a74da46bf2123b72d78d0d2ade1e26f592425 --- /dev/null +++ b/doc/Cupti/structCUpti__Profiler__UnsetConfig__Params.html @@ -0,0 +1,51 @@ + + +Cupti: CUpti_Profiler_UnsetConfig_Params Struct Reference + + + + + +
+

CUpti_Profiler_UnsetConfig_Params Struct Reference
+ +[CUPTI Profiling API] +

Params for cuptiProfilerUnsetConfig. + +

+ + + + + + + + + + + + +

Data Fields

+CUcontext ctx
 [in] if NULL, the current CUcontext is used
+void * pPriv
 [in] assign to NULL
+size_t structSize
 [in] CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE
+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__ResourceData.html b/doc/Cupti/structCUpti__ResourceData.html new file mode 100644 index 0000000000000000000000000000000000000000..2b4c4c7c1ea61070d796a2a21156841801aaabe0 --- /dev/null +++ b/doc/Cupti/structCUpti__ResourceData.html @@ -0,0 +1,92 @@ + + +Cupti: CUpti_ResourceData Struct Reference + + + + + +
+

CUpti_ResourceData Struct Reference
+ +[CUPTI Callback API] +

Data passed into a resource callback function. +More... +

+ + + + + + + + + +

Data Fields

CUcontext context
void * resourceDescriptor
CUstream stream
+


Detailed Description

+Data passed into a resource callback function as the cbdata argument to CUpti_CallbackFunc. The cbdata will be this type for domain equal to CUPTI_CB_DOMAIN_RESOURCE. The callback data is valid only within the invocation of the callback function that is passed the data. If you need to retain some data for use outside of the callback, you must make a copy of that data.

Field Documentation

+ +
+
+ + + + +
CUcontext CUpti_ResourceData::context
+
+
+ +

+For CUPTI_CBID_RESOURCE_CONTEXT_CREATED and CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING, the context being created or destroyed. For CUPTI_CBID_RESOURCE_STREAM_CREATED and CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING, the context containing the stream being created or destroyed. +

+

+ +

+ +
+ +

+Reserved for future use. +

+

+ +

+
+ + + + +
CUstream CUpti_ResourceData::stream
+
+
+ +

+For CUPTI_CBID_RESOURCE_STREAM_CREATED and CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING, the stream being created or destroyed. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structCUpti__SynchronizeData.html b/doc/Cupti/structCUpti__SynchronizeData.html new file mode 100644 index 0000000000000000000000000000000000000000..00f595ce45044aba0848270a25a477875c446040 --- /dev/null +++ b/doc/Cupti/structCUpti__SynchronizeData.html @@ -0,0 +1,75 @@ + + +Cupti: CUpti_SynchronizeData Struct Reference + + + + + +
+

CUpti_SynchronizeData Struct Reference
+ +[CUPTI Callback API] +

Data passed into a synchronize callback function. +More... +

+ + + + + + + +

Data Fields

CUcontext context
CUstream stream
+


Detailed Description

+Data passed into a synchronize callback function as the cbdata argument to CUpti_CallbackFunc. The cbdata will be this type for domain equal to CUPTI_CB_DOMAIN_SYNCHRONIZE. The callback data is valid only within the invocation of the callback function that is passed the data. If you need to retain some data for use outside of the callback, you must make a copy of that data.

Field Documentation

+ +
+
+ + + + +
CUcontext CUpti_SynchronizeData::context
+
+
+ +

+The context of the stream being synchronized. +

+

+ +

+
+ + + + +
CUstream CUpti_SynchronizeData::stream
+
+
+ +

+The stream being synchronized. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structHeader.html b/doc/Cupti/structHeader.html new file mode 100644 index 0000000000000000000000000000000000000000..a066a5a686416d0d211b56ba683312f8316dfff0 --- /dev/null +++ b/doc/Cupti/structHeader.html @@ -0,0 +1,74 @@ + + +Cupti: Header Struct Reference + + + + + +
+

Header Struct Reference
+ +[CUPTI PC Sampling Utility API] +

Header info will be stored in file. + +

+ + + + + + + +

Data Fields

uint32_t totalBuffers
uint32_t version
+


Field Documentation

+ +
+
+ + + + +
uint32_t Header::totalBuffers
+
+
+ +

+Total number of buffers present in the file. +

+

+ +

+
+ + + + +
uint32_t Header::version
+
+
+ +

+Version of file format. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structNV_1_1Cupti_1_1Checkpoint_1_1CUpti__Checkpoint.html b/doc/Cupti/structNV_1_1Cupti_1_1Checkpoint_1_1CUpti__Checkpoint.html new file mode 100644 index 0000000000000000000000000000000000000000..d37ec92babfd05fd879ba6335aab2483e16d42dc --- /dev/null +++ b/doc/Cupti/structNV_1_1Cupti_1_1Checkpoint_1_1CUpti__Checkpoint.html @@ -0,0 +1,71 @@ + + +Cupti: NV::Cupti::Checkpoint::CUpti_Checkpoint Struct Reference + + + + + +
+

NV::Cupti::Checkpoint::CUpti_Checkpoint Struct Reference
+ +[CUPTI Checkpoint API] +

Configuration and handle for a CUPTI Checkpoint. +More... +

+ + + + + + + + + + + + + + + + + + + + + + + + +

Data Fields

+uint8_t allowOverwrite
 [in] Boolean, Allow checkpoint to save over existing checkpoint
+CUcontext ctx
 [in] Set to context to save from, or will use current context if NULL
+uint8_t optimizations
 [in] Mask of CUpti_CheckpointOptimizations flags for this checkpoint
+void * pPriv
 [in] Assign to NULL
+size_t reserveDeviceMB
 [in] Restrict checkpoint from using last N MB of device memory (-1 = use no device memory)
+size_t reserveHostMB
 [in] Restrict checkpoint from using last N MB of host memory (-1 = use no host memory)
+size_t structSize
 [in] Must be set to CUpti_Checkpoint_STRUCT_SIZE
+


Detailed Description

+A CUptiCheckpoint object should be initialized with desired options prior to passing into any CUPTI Checkpoint API function. The first call into a Checkpoint API function will initialize internal state based on these options. Subsequent changes to these options will not have any effect.

+Checkpoint data is saved in device, host, and filesystem space. There are options to reserve memory at each level (device, host, filesystem) which are intended to allow a guarantee that a certain amount of memory will remain free for use after the checkpoint is saved. Note, however, that falling back to slower levels of memory (host, and then filesystem) to save the checkpoint will result in performance degradation. Currently, the filesystem limitation is not implemented. Note that falling back to filesystem storage may significantly impact the performance for saving and restoring a checkpoint.

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/structPcSamplingStallReasons.html b/doc/Cupti/structPcSamplingStallReasons.html new file mode 100644 index 0000000000000000000000000000000000000000..f2c5172f47e260a752891a10855bef559f1113cb --- /dev/null +++ b/doc/Cupti/structPcSamplingStallReasons.html @@ -0,0 +1,91 @@ + + +Cupti: PcSamplingStallReasons Struct Reference + + + + + +
+

PcSamplingStallReasons Struct Reference
+ +[CUPTI PC Sampling Utility API] +

All available stall reasons name and respective indexes will be stored in it. + +

+ + + + + + + + + +

Data Fields

size_t numStallReasons
uint32_t * stallReasonIndex
char ** stallReasons
+


Field Documentation

+ +
+ +
+ +

+Number of all available stall reasons +

+

+ +

+ +
+ +

+Stall reason index of all available stall reasons +

+

+ +

+ +
+ +

+Stall reasons names of all available stall reasons +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/tab_b.gif b/doc/Cupti/tab_b.gif new file mode 100644 index 0000000000000000000000000000000000000000..0d623483ffdf5f9f96900108042a7ab0643fe2a3 Binary files /dev/null and b/doc/Cupti/tab_b.gif differ diff --git a/doc/Cupti/tab_l.gif b/doc/Cupti/tab_l.gif new file mode 100644 index 0000000000000000000000000000000000000000..9b1e6337c9299a700401a2a78a2c6ffced475216 Binary files /dev/null and b/doc/Cupti/tab_l.gif differ diff --git a/doc/Cupti/tab_r.gif b/doc/Cupti/tab_r.gif new file mode 100644 index 0000000000000000000000000000000000000000..ce9dd9f533cb5486d6941844f442b59d4a9e9175 Binary files /dev/null and b/doc/Cupti/tab_r.gif differ diff --git a/doc/Cupti/tabs.css b/doc/Cupti/tabs.css new file mode 100644 index 0000000000000000000000000000000000000000..ab02c624afffb3c72a51ea259104cfeeeb7498c4 --- /dev/null +++ b/doc/Cupti/tabs.css @@ -0,0 +1,105 @@ +/* tabs styles, based on http://www.alistapart.com/articles/slidingdoors */ + +DIV.tabs +{ + float : left; + width : 100%; + background : url("tab_b.gif") repeat-x bottom; + margin-bottom : 4px; +} + +DIV.tabs UL +{ + margin : 0px; + padding-left : 10px; + list-style : none; +} + +DIV.tabs LI, DIV.tabs FORM +{ + display : inline; + margin : 0px; + padding : 0px; +} + +DIV.tabs FORM +{ + float : right; +} + +DIV.tabs A +{ + float : left; + background : url("tab_r.gif") no-repeat right top; + border-bottom : 1px solid #84B0C7; + font-size : 80%; + font-weight : bold; + text-decoration : none; +} + +DIV.tabs A:hover +{ + background-position: 100% -150px; +} + +DIV.tabs A:link, DIV.tabs A:visited, +DIV.tabs A:active, DIV.tabs A:hover +{ + color: #1A419D; +} + +DIV.tabs SPAN +{ + float : left; + display : block; + background : url("tab_l.gif") no-repeat left top; + padding : 5px 9px; + white-space : nowrap; +} + +DIV.tabs INPUT +{ + float : right; + display : inline; + font-size : 1em; +} + +DIV.tabs TD +{ + font-size : 80%; + font-weight : bold; + text-decoration : none; +} + + + +/* Commented Backslash Hack hides rule from IE5-Mac \*/ +DIV.tabs SPAN {float : none;} +/* End IE5-Mac hack */ + +DIV.tabs A:hover SPAN +{ + background-position: 0% -150px; +} + +DIV.tabs LI.current A +{ + background-position: 100% -150px; + border-width : 0px; +} + +DIV.tabs LI.current SPAN +{ + background-position: 0% -150px; + padding-bottom : 6px; +} + +DIV.navpath +{ + background : none; + border : none; + border-bottom : 1px solid #84B0C7; + text-align : center; + margin : 2px; + padding : 2px; +} diff --git a/doc/Cupti/unionCUpti__ActivityObjectKindId.html b/doc/Cupti/unionCUpti__ActivityObjectKindId.html new file mode 100644 index 0000000000000000000000000000000000000000..ebefb43be2009379bc7c7b8ff0bda2f86ca1a238 --- /dev/null +++ b/doc/Cupti/unionCUpti__ActivityObjectKindId.html @@ -0,0 +1,80 @@ + + +Cupti: CUpti_ActivityObjectKindId Union Reference + + + + + +
+

CUpti_ActivityObjectKindId Union Reference
+ +[CUPTI Activity API] +

Identifiers for object kinds as specified by CUpti_ActivityObjectKind. +More... +

+ + + + + + + + + + + +

Data Fields

struct {
dcs
struct {
pt
+


Detailed Description

+
See also:
CUpti_ActivityObjectKind
+

Field Documentation

+ +
+
+ + + + +
struct { ... } CUpti_ActivityObjectKindId::dcs
+
+
+ +

+A device object requires that we identify the device ID. A context object requires that we identify both the device and context ID. A stream object requires that we identify device, context, and stream ID. +

+

+ +

+
+ + + + +
struct { ... } CUpti_ActivityObjectKindId::pt
+
+
+ +

+A process object requires that we identify the process ID. A thread object requires that we identify both the process and thread ID. +

+

+

+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/Cupti/unionCUpti__MetricValue.html b/doc/Cupti/unionCUpti__MetricValue.html new file mode 100644 index 0000000000000000000000000000000000000000..675fc117d127658159f5d7cde32e1f395b0249b1 --- /dev/null +++ b/doc/Cupti/unionCUpti__MetricValue.html @@ -0,0 +1,39 @@ + + +Cupti: CUpti_MetricValue Union Reference + + + + + +
+

CUpti_MetricValue Union Reference
+ +[CUPTI Metric API] +

A metric value. +More... +

+ + +
+


Detailed Description

+Metric values can be one of several different kinds. Corresponding to each kind is a member of the CUpti_MetricValue union. The metric value returned by cuptiMetricGetValue should be accessed using the appropriate member of that union based on its value kind.
+
Generated on Fri Aug 26 17:18:25 2022 for Cupti by  + +doxygen 1.5.8
+ + diff --git a/doc/common/formatting/bg-head.png b/doc/common/formatting/bg-head.png new file mode 100644 index 0000000000000000000000000000000000000000..7757e5f3ef0173c4ea7d0234f64da1796997c68e Binary files /dev/null and b/doc/common/formatting/bg-head.png differ diff --git a/doc/common/formatting/bg-horiz.png b/doc/common/formatting/bg-horiz.png new file mode 100644 index 0000000000000000000000000000000000000000..05595c4666360100aeafcd0f92adb6c392d22ed7 Binary files /dev/null and b/doc/common/formatting/bg-horiz.png differ diff --git a/doc/common/formatting/bg-left.png b/doc/common/formatting/bg-left.png new file mode 100644 index 0000000000000000000000000000000000000000..26383b062ae7cb5d098d5b64eb814ba37e342057 Binary files /dev/null and b/doc/common/formatting/bg-left.png differ diff --git a/doc/common/formatting/bg-right.png b/doc/common/formatting/bg-right.png new file mode 100644 index 0000000000000000000000000000000000000000..82b1cfdc3cc49db0476f86c2059025672d8828f7 Binary files /dev/null and b/doc/common/formatting/bg-right.png differ diff --git a/doc/common/formatting/bg-sidehead-glow.png b/doc/common/formatting/bg-sidehead-glow.png new file mode 100644 index 0000000000000000000000000000000000000000..33c4f815402ac0b5665bb54f866e0f76189831fd Binary files /dev/null and b/doc/common/formatting/bg-sidehead-glow.png differ diff --git a/doc/common/formatting/bg-sidehead.png b/doc/common/formatting/bg-sidehead.png new file mode 100644 index 0000000000000000000000000000000000000000..a782b9136f11d7dd15471f0ded92f6aec982e0b9 Binary files /dev/null and b/doc/common/formatting/bg-sidehead.png differ diff --git a/doc/common/formatting/bg-vert.png b/doc/common/formatting/bg-vert.png new file mode 100644 index 0000000000000000000000000000000000000000..a25d0b2f5e7370cae6404a4115f6fa1138d4b1fd Binary files /dev/null and b/doc/common/formatting/bg-vert.png differ diff --git a/doc/common/formatting/common.min.js b/doc/common/formatting/common.min.js new file mode 100644 index 0000000000000000000000000000000000000000..ec79acb115262679a7844c5a87b406bc30010dcc --- /dev/null +++ b/doc/common/formatting/common.min.js @@ -0,0 +1 @@ +var $body=$(document.body),$search=$("#search input"),$reset=$('#search button[type="reset"]'),$contents_container=$("#contents-container"),$content=$("#contents"),$sections=$content.find(".topic"),$results=$("#search-results"),$resultOL=$results.find("ol"),$sitenav=$("#site-nav"),$resizenav=$("#resize-nav"),sitenav_default_width=parseInt($sitenav.css("width")),sitenav_min_width=100,sitenav_width,$cur_page_result=$('
  • Current Document
  • '),$other_page_results=$('
  • Other Documents
  • ');var max_highlight=20,hl_prefix='',hl_postfix="";var page_title=document.getElementsByTagName("title")[0].innerHTML;var number_in_top_5=0;var unhighlight=(function(){var a=new RegExp(hl_prefix,"g");var b=new RegExp(hl_postfix,"g");var c=document.getElementById("contents");return function(e){var d=c.innerHTML;d=d.replace(a,"");c.innerHTML=d.replace(b,"")}})();var ajax_semaphore=(function(c){var b=0;function a(){b=c}a.prototype.acquire_resource=function(){if(this._available_resource>=0){--this._available_resource;return true}return false};a.prototype.release_resource=function(){if(this._available_resource=0||b.indexOf(g.title)>=0)){if($other_page_results.parent().length>0){var h=$('
  • '+g.title+"

  • ").insertBefore($other_page_results)}else{var h=$('
  • '+g.title+"

  • ");$resultOL.prepend($cur_page_result,h,$other_page_results)}++number_in_top_5}else{var h=$('
  • '+g.title+"

  • ").appendTo($resultOL)}$("#num-in-top",$cur_page_result).text(number_in_top_5>=5?"(top 5)":"");if(g.text){var e=htmlEscape(g.text);if(g.bold){var i=new RegExp(g.bold,"gi");e=e.replace(i,"$&")}h.append('

    Loading...

    ')}if(f||e=="..."||e=="Loading..."||e==""){var a=new XMLHttpRequest();a.onreadystatechange=function(){if(a.readyState==4){var o=new RegExp(g.bold,"gi");var l=$(a.responseText);var n=$("article > div.topic",l);var k=n.text();k=k.replace(/<[^<>]*>/gi,"");k=k.replace(/(\s)\s*/g," ");var j=k.search(o);if(j<0){j=k.search(c)}var p=k.lastIndexOf(" ",j-10);var m=k.indexOf(" ",j+15);k=k.substring(p,m);if(j>=0){k=k.replace(o,"$&");k=k.replace(c,"$&")}if(j>0){k="..."+k}k=k+"...";$(".shortDescription",h).html(k)}};a.open("GET",g.href,true);a.send()}return h}function syncTOC(){var f=$contents_container[0].scrollTop,e=$("#site-nav .current"),c=e.find("a").attr("href"),h=Number.NEGATIVE_INFINITY,g;$sections.each(function(){var i=this.offsetTop-f-1;if(i<=0&&i>h){h=i;g=this}});if(!g){g=$sections.get(0)}if(!g){return}var d="#"+g.id;if(c!=d){e.removeClass("current");var b=$('a[href="'+d+'"]',$sitenav);if(!b.length){var a=location.pathname.match(/[^\/]+$/)[0];b=$('a[href="'+a+d+'"]')}b=b.closest("li:visible");if(!b.length){return}b.addClass("current");toggleNavSectionByName(d,true);b.scrollintoview({duration:0})}}function toggleNavSectionByHandle(d,a){var b=d.children(".section-link").first();if(!b.length){return}var c=b.next("ul");var a=a!==undefined?a:!c.is(":visible");c.toggle(a);b.children(".twiddle").html(a?"▽":"▷");if(a){toggleNavSectionByHandle(d.parent().parent(),a)}}function toggleNavSectionByName(b,a){var c=$('a[href$="'+b+'"]',$sitenav).parent().parent();toggleNavSectionByHandle(c,a)}function allowNavSectionCollapsing(){$(".section-link").parents().has("ul:parent").each(function(){var b=$(this);var a=b.children(".section-link").first();$('').prependTo(a).click(function(){toggleNavSectionByHandle(b)});a.children("a").click(function(){var e=a.next("ul");var c=$(this).attr("href");var d=location.href.match(/[^\/]+$/)[0];toggleNavSectionByHandle(b,(location.hash!=c&&d!=c)||!e.is(":visible"));if(d==c){return false}})})}function toggleNavSectionAll(a){$(".section-link").parents().has("ul").each(function(){toggleNavSectionByHandle($(this),a)})}function syncNavSectionCollapse(){var a=location.href.match(/([^\/]+)\.html$/);if(!location.hash&&(typeof a==="null"||!a[1])){return}var b=location.hash?location.hash:"#"+a[1];toggleNavSectionAll(false);toggleNavSectionByName(b,true)}function setSitenavWidth(a){a=Math.min($(document).width()/2,Math.max(0,a));if(a').appendTo($resizenav).mousedown(function(a){if($sitenav.width()>0){setSitenavWidth(0)}else{setSitenavWidth(getSitenavWidth()>0?getSitenavWidth():sitenav_default_width)}})}function addEnoughPaddingAfterContent(){var b=$("#contents-end").css("margin",0),a=$(".topic:last");if($("dl.landing-page").length){return}$(window).on("resize",c);c();function c(){b.css("margin-top",Math.max($contents_container[0].offsetHeight-a.height()-15,0))}}function scrollIntoView(b){if((typeof b==="string")||(b=location.hash.substr(1))){var a=$sections.find('.topic[id="'+b+'"]').offsetRelativeTo($content);if(a){$content.scrollTop(a.top)}}else{$content.scrollTop(0)}}RegExp.escape=function(a){return a.replace("/[-[]{}()*+?.,\\^$|#s]/g","\\$&")};function htmlEscape(a){return a.replace(/&/g,"&").replace(//g,">")}function canonical(a){var b=/([^\/#]+)(?:#[^#\/]*)?$/.exec(a);b=b&&b[1];return b}function atRoot(){return canonical(location.href)=="docs.html"}function relative(a){return atRoot()?a:"../../"+a}jQuery.fn.offsetRelativeTo=function(b){var a=$(b),d=this.offset(),c=a.offset();if(d){d.top-=c.top-a.scrollTop();d.left-=c.left-a.scrollLeft()}return d};function setCookie(a,b){document.cookie=a+"="+b+"; path=/"}function readCookie(a){for(var c=document.cookie.split(/;\s*/),b=c.length;b--;){var d=c[b].split("=");if(d[0]==a){return d[1]}}}function supportsMathML(){var a=false;if(document.createElementNS){var b="http://www.w3.org/1998/Math/MathML";var d=document.createElement("div");d.style.position="absolute";var c=d.appendChild(document.createElementNS(b,"math")).appendChild(document.createElementNS(b,"mfrac"));c.appendChild(document.createElementNS(b,"mi")).appendChild(document.createTextNode("xx"));c.appendChild(document.createElementNS(b,"mi")).appendChild(document.createTextNode("yy"));document.body.appendChild(d);a=d.offsetHeight>d.offsetWidth;document.body.removeChild(d)}return a}function highlight(f){var c=false;var b;var g=document.getElementById("contents").children;var e=new RegExp("(?:>[^<]*)("+f+")(?:[^>]*<)","gi");var a=function(h,i){var j=h.replace(i,hl_prefix+i+hl_postfix);return j};for(b=0;b<3&&b')}}); \ No newline at end of file diff --git a/doc/common/formatting/commonltr.css b/doc/common/formatting/commonltr.css new file mode 100644 index 0000000000000000000000000000000000000000..845b9006a9b5097b9bd2d49fdcb20ca85010f32a --- /dev/null +++ b/doc/common/formatting/commonltr.css @@ -0,0 +1,136 @@ +/* + | This file is part of the DITA Open Toolkit project hosted on + | Sourceforge.net. See the accompanying license.txt file for + | applicable licenses. +*/ + +/* + | (c) Copyright IBM Corp. 2004, 2005 All Rights Reserved. + */ + +.unresolved { background-color: skyblue; } +.noTemplate { background-color: yellow; } + +.base { background-color: #ffffff; } + +/* Add space for top level topics */ +.nested0 { margin-top : 1em;} + +/* div with class=p is used for paragraphs that contain blocks, to keep the XHTML valid */ +.p {margin-top: 1em} + +/* Default of italics to set apart figure captions */ +.figcap { font-style: italic } +.figdesc { font-style: normal } + +/* Use @frame to create frames on figures */ +.figborder { border-style: solid; padding-left : 3px; border-width : 2px; padding-right : 3px; margin-top: 1em; border-color : Silver;} +.figsides { border-left : 2px solid; padding-left : 3px; border-right : 2px solid; padding-right : 3px; margin-top: 1em; border-color : Silver;} +.figtop { border-top : 2px solid; margin-top: 1em; border-color : Silver;} +.figbottom { border-bottom : 2px solid; border-color : Silver;} +.figtopbot { border-top : 2px solid; border-bottom : 2px solid; margin-top: 1em; border-color : Silver;} + +/* Most link groups are created with
    . Ensure they have space before and after. */ +.ullinks { list-style-type: none } +.ulchildlink { margin-top: 1em; margin-bottom: 1em } +.olchildlink { margin-top: 1em; margin-bottom: 1em } +.linklist { margin-bottom: 1em } +.linklistwithchild { margin-left: 1.5em; margin-bottom: 1em } +.sublinklist { margin-left: 1.5em; margin-bottom: 1em } +.relconcepts { margin-top: 1em; margin-bottom: 1em } +.reltasks { margin-top: 1em; margin-bottom: 1em } +.relref { margin-top: 1em; margin-bottom: 1em } +.relinfo { margin-top: 1em; margin-bottom: 1em } +.breadcrumb { font-size : smaller; margin-bottom: 1em } +dt.prereq { margin-left : 20px;} + +/* Set heading sizes, getting smaller for deeper nesting */ +.topictitle1 { margin-top: 0pc; margin-bottom: .1em; font-size: 1.34em; } +.topictitle2 { margin-top: 1pc; margin-bottom: .45em; font-size: 1.17em; } +.topictitle3 { margin-top: 1pc; margin-bottom: .17em; font-size: 1.17em; font-weight: bold; } +.topictitle4 { margin-top: .83em; font-size: 1.17em; font-weight: bold; } +.topictitle5 { font-size: 1.17em; font-weight: bold; } +.topictitle6 { font-size: 1.17em; font-style: italic; } +.sectiontitle { margin-top: 1em; margin-bottom: 0em; color: black; font-size: 1.17em; font-weight: bold;} +.section { margin-top: 1em; margin-bottom: 1em } +.example { margin-top: 1em; margin-bottom: 1em } +div.tasklabel { margin-top: 1em; margin-bottom: 1em; } +h2.tasklabel, h3.tasklabel, h4.tasklabel, h5.tasklabel, h6.tasklabel { font-size: 100%; } + +/* All note formats have the same default presentation */ +.note { margin-top: 1em; margin-bottom : 1em;} +.notetitle { font-weight: bold } +.notelisttitle { font-weight: bold } +.tip { margin-top: 1em; margin-bottom : 1em;} +.tiptitle { font-weight: bold } +.fastpath { margin-top: 1em; margin-bottom : 1em;} +.fastpathtitle { font-weight: bold } +.important { margin-top: 1em; margin-bottom : 1em;} +.importanttitle { font-weight: bold } +.remember { margin-top: 1em; margin-bottom : 1em;} +.remembertitle { font-weight: bold } +.restriction { margin-top: 1em; margin-bottom : 1em;} +.restrictiontitle { font-weight: bold } +.attention { margin-top: 1em; margin-bottom : 1em;} +.attentiontitle { font-weight: bold } +.dangertitle { font-weight: bold } +.danger { margin-top: 1em; margin-bottom : 1em;} +.cautiontitle { font-weight: bold } +.caution { font-weight: bold; margin-bottom : 1em; } +.warning { margin-top: 1em; margin-bottom : 1em;} +.warningtitle { font-weight: bold } + +/* Simple lists do not get a bullet */ +ul.simple { list-style-type: none } + +/* Used on the first column of a table, when rowheader="firstcol" is used */ +.firstcol { font-weight : bold;} + +/* Various basic phrase styles */ +.bold { font-weight: bold; } +.boldItalic { font-weight: bold; font-style: italic; } +.italic { font-style: italic; } +.underlined { text-decoration: underline; } +.uicontrol { font-weight: bold; } +.parmname { font-weight: bold; } +.kwd { font-weight: bold; } +.defkwd { font-weight: bold; text-decoration: underline; } +.var { font-style : italic;} +.shortcut { text-decoration: underline; } + +/* Default of bold for definition list terms */ +.dlterm { font-weight: bold; } + +/* Use CSS to expand lists with @compact="no" */ +.dltermexpand { font-weight: bold; margin-top: 1em; } +*[compact="yes"]>li { margin-top: 0em;} +*[compact="no"]>li { margin-top: .53em;} +.liexpand { margin-top: 1em; margin-bottom: 1em } +.sliexpand { margin-top: 1em; margin-bottom: 1em } +.dlexpand { margin-top: 1em; margin-bottom: 1em } +.ddexpand { margin-top: 1em; margin-bottom: 1em } +.stepexpand { margin-top: 1em; margin-bottom: 1em } +.substepexpand { margin-top: 1em; margin-bottom: 1em } + +/* Align images based on @align on topic/image */ +div.imageleft { text-align: left } +div.imagecenter { text-align: center } +div.imageright { text-align: right } +div.imagejustify { text-align: justify } + +/* The cell border can be turned on with + {border-right:solid} + This value creates a very thick border in Firefox (does not match other tables) + + Firefox works with + {border-right:solid 1pt} + but this causes a barely visible line in IE */ +.cellrowborder { border-left:none; border-top:none; border-right:solid 1px; border-bottom:solid 1px } +.row-nocellborder { border-left:none; border-right:none; border-top:none; border-right: hidden; border-bottom:solid 1px} +.cell-norowborder { border-top:none; border-bottom:none; border-left:none; border-bottom: hidden; border-right:solid 1px} +.nocellnorowborder { border:none; border-right: hidden;border-bottom: hidden } +.table { margin-bottom: 30px } + +pre.screen { padding: 5px 5px 5px 5px; border: outset; background-color: #CCCCCC; margin-top: 2px; margin-bottom : 2px; white-space: pre} + +span.filepath { font-family:monospace } \ No newline at end of file diff --git a/doc/common/formatting/cppapiref.css b/doc/common/formatting/cppapiref.css new file mode 100644 index 0000000000000000000000000000000000000000..5619f9bf994e7b60a81f8a09cd728bc01a2c911e --- /dev/null +++ b/doc/common/formatting/cppapiref.css @@ -0,0 +1,425 @@ +/****************************************************************************** + * Copyright 1986-2011 by mental images GmbH, Fasanenstr. 81, D-10623 Berlin, + * Germany. All rights reserved. + ******************************************************************************/ +tbody.cppapiref { color: #231f20; font-size: 11px; line-height: 1.7em; } + +caption { font-weight: bold } + +A.el { text-decoration: none; font-weight: bold } +A.elRef { font-weight: bold ; text-decoration: none; } +A.code:link { text-decoration: none; font-weight: normal;} +A.code:visited { text-decoration: none; font-weight: normal;} +A.codeRef:link { font-weight: normal; text-decoration: none; } +code { padding: 0 0.1em; } +A.codeRef:visited { font-weight: normal; text-decoration: none; } +DL.el { margin-left: -1cm } + +SPAN.keyword { color: #008000 } +SPAN.keywordtype { color: #604020 } +SPAN.keywordflow { color: #e08000 } +SPAN.comment { color: #007698 } +SPAN.preprocessor { color: #806020 } +SPAN.stringliteral { color: #002080 } +SPAN.charliteral { color: #008080 } + +.dirtab { padding: 4px; + border-collapse: collapse; + border: 1px solid #84b0c7; +} +TH.dirtab { background: #e8eef2; + font-weight: bold; +} +HR { height: 1px; border: none; border-top: 1px solid black; } +.miFooter { color: #717073; font-size: 0.9em; font-style: normal; text-decoration: none; text-align: left; height: 600px; } +H2.miCopyright { text-align: left; line-height: 2em; } +DIV.miCopyright { } +SUP.miCopyrightTM { font-size: 0.7em !important; } +SUP.miCopyright { font-size: 0.9em; padding-left: 0.2em; } + +th.cppapiref { + color: #5f604b; + text-align: left; +} + +th.cppapiref, td.cppapiref { + border: 0px; + vertical-align: top; +} + +table.spec td.cppapiref { + vertical-align: top; +} +table.cppapiref { border-top: 1px solid #d8d9da; border-right-width: 0; border-bottom-width: 0; border-left-width: 0; } +table.spec td.pos { + text-align: center; +} + +table.spec td.tp, +span.tp { font-family: monospace, fixed, courier; font-style: italic; text-align: center; } + +table.spec table.bits { + margin:0px; + border:0px; +} + +th.cppapiref, table.spec table.bits td.cppapiref { + border-bottom: 1px solid #ddd; +} + +table.spec table.bits { + margin: 0px; +} + +table.spec table.bits th.cppapiref, +table.spec table.bits td.cppapiref { + margin: 0px; + padding: 0px; +} + +table.spec th.cppapiref { + +} + +table.spec table.bits th.cppapiref { + color: #000; + text-align: left; +} + +div.comment { + font-style: italic; + color: #777; +} + +dd table.cppapiref em { + font-family: monospace, fixed, courier; + font-style: normal; +} + +table.extends { + border-style: none; + border-spacing: 0 0; + vertical-align: top; + margin: 0px 0px; + padding: 0px; + padding-top: 8pt; + border: 0px; +} + +table.extends tr.cppapiref { + vertical-align: top; +} + +table.extends td.cppapiref, table.extends th.cppapiref { + padding: 0pt; + padding-right: 16pt; + padding-bottom: 8pt; +} + +table.extends th.cppapiref { + color: #717073; + border: 0; +} + +table.extends ul { + padding-left: 16pt; + margin-left: 0pt; +} + +/* Style for detailed member documentation */ +.memtemplate { + color: #606060; + font-weight: normal; +} +.memnav { + /*background-color: #e8eef2;*/ + text-align: center; + margin: 2px; + margin-right: 15px; + padding: 2px; +} +.memitem { + padding: 1px; + /*background-color: #eef3f5;*/ +} +.memname { + white-space: nowrap; + font-weight: bold; +} +.memdoc{ padding-left: 20px; } +.memproto { + /*background-color: #eef3f5;*/ + background-color: #efeff0; border-top: 1px solid #d8dcde; border-bottom: 1px solid #eef3f5; padding: 3px; } +.paramkey { + text-align: right; +} +.paramtype { + white-space: nowrap; +} +.paramname { + /*color: #eef3f5;*/ + font-style: italic; + white-space: nowrap; +} + +.figure_caption { + width: 70%; + padding-top: 5px; + padding-left: 50px; + padding-right: 50px; +} + + + +/* End Styling for detailed member documentation */ + + +/* for the tree view: ftvtree is new in doxygen 1.5.4 */ +.ftvtree { margin:0.5em; } +.directory { font-weight: bold; } +.directory h3 { margin: 1em 0 0; } +.directory > h3 { margin-top: 0; } +.directory p { margin: 0px; white-space: nowrap; } +.directory div { display: none; margin: 0px; } +.directory img { vertical-align: -30%; } +ul li p { margin:0 } +ul li p + p { margin-top:0.8em } +hr { color: #a1a1a4; background-color: #a1a1a4; height: 1px; border-width: 0; } +DIV.directory h3 { position: relative; height: 0.3em; visibility: hidden; float: none; clear: none; } +.super { + font-size: .9em; + margin: 0 0 0 2px; + position: relative; + top: -2px; + vertical-align: top; + } +.memproto .memname { border-width: 0; } +.memproto .memtemplate { padding-left: 3px; } +.contents { font-size: 12px; line-height: 1.4em; } +.contents H2 A.anchor { color: #000; text-decoration: none; } +.contents A { color: #00467f; text-decoration: none; } +.contents A:hover { text-decoration: underline; } +.contents A:visited { color: #007698; } +.navigation { position: relative; width: 100%; float: left; } +/* +#content { + padding-left:20px; + width:550px; + }*/ + +/* Everything below this has been added by Nicolas */ + +/* Style for definition lists used in the definition part */ +dl.members { + border-spacing: 0px; + padding: 0px; + border-style:none; + /*background-color:#FAFAFA;*/ + font-size:11px; +} + +/* Each dt in a dl.members is split into two divs, top-left and top-right, to simulate a table*/ +div.top-left { + float:left; + width:113px; + text-align:right; + vertical-align:middle; + margin:4px; +} + +/*div.top-left p.template +{ + text-align:left; + width:100%; + margin: -14pt 0; + padding: 8pt 0; +}*/ + +p.template +{ + margin:0; + color:#606060; +} + +span.template +{ +/* font-weight: lighter; + font-size: smaller;*/ + margin:0; + color:#606060; + display: block; +} + +span.member_type, span.member_long_type +{ + text-align: right; + padding-right:10px; +} + +span.member_long_type +{ + overflow:visible; + white-space:nowrap; +} + +span.member_name, span.member_name_long_type +{ + text-align:left; + white-space:nowrap; +} + +dl.members { display:table; border-collapse:collapse; } +dl.members dt, +dl.members dd { display:table-row } +dl.members span.member_type, +dl.members span.member_long_type, +dl.members span.member_name_long_type { display:table-cell; width:auto; white-space:nowrap; padding:0; float:none } +dl.members dt { font-family:monospace } + +dl.members dd.shortdesc span { display:table-cell; color:#666; padding-bottom:0.6em; } + +/* Div contained in dl.members dd */ +div.bottom { + border:1px none #E0E0E0; + margin:4px; +} + +div.description { + border-style: solid none none; + border-width:1px; + padding-top:10px; + } + +/* The following elements are used in definition lists part of the description div */ +dt.description { + text-align:left; + background-color: #efeff0; + border-top: 1px solid #d8dcde; + border-bottom: 1px solid #eef3f5; + padding: 3px; +} + +dd.description div.section p +{ + margin-left:0px; +} + +dd.description { + + margin-left:20px; + margin-bottom: 20px; +} + +.keyword { + margin-left:0px; +} + +span.keyword +{ color:#000000; +} + +div.signature { + font-weight:bold; + +} + +div.signature .membername { + font-weight: bold; +} + +div.signature .param-name { + font-style: italic; +} + +div.parameterlist > dl > dt { + font-style: italic; +} + +div.relinfo +{ + left:-100px; +} + +div.description h2 +{ + margin-bottom:1em; +} + +.sectiontitle +{ + font-size:8pt; + margin-bottom:1pt; +} + +/* Display list as a table: http://www.maxdesign.com.au/presentation/definition/dl-table-display.htm +*/ +dl.table-display-params, dl.enumerator +{ + width: 470px; + /*margin: 5px 20pt 5px; + padding: 5px;*/ + overflow: hidden; +} + +.table-display-params dd, .enumerator dd +{ + left:0; + margin-left:50px; + margin-bottom: 5px; + margin-top: 2px; +} + +.classifier_name +{ + margin-left:0px; +} + +p.return +{ + margin-top:5px; +} + +tt.code +{ + border:none; +} + +p.apiDesc_subtitle +{ + margin-bottom:5px; +} + +#content>span, .classifier_name +{ +margin-left:40px; +} + +.doxy_graph +{ + margin-left:20px; + padding-left:20px; +} + +.enum-member-name +{ + display:block; + padding-left:10px; +} + +.enum-member-name-def +{ + display:block; + text-align:left; + background-color: #efeff0; + border-top: 1px solid #d8dcde; + border-bottom: 1px solid #eef3f5; + padding: 3px; +} + +p.return-value +{ + padding-left:20px; + margin-top:5px; + margin-bottom:5px; +} + diff --git a/doc/common/formatting/cuda-toolkit-documentation.png b/doc/common/formatting/cuda-toolkit-documentation.png new file mode 100644 index 0000000000000000000000000000000000000000..09a8e8a760c51c4ff40a0d512937da988a2408ac Binary files /dev/null and b/doc/common/formatting/cuda-toolkit-documentation.png differ diff --git a/doc/common/formatting/devtools-documentation.png b/doc/common/formatting/devtools-documentation.png new file mode 100644 index 0000000000000000000000000000000000000000..ddc28b09ab087fb4353ace1053e4c6b20f02b367 Binary files /dev/null and b/doc/common/formatting/devtools-documentation.png differ diff --git a/doc/common/formatting/devzone.png b/doc/common/formatting/devzone.png new file mode 100644 index 0000000000000000000000000000000000000000..122a0ee2c709b1d596040f683028b237a92a2563 Binary files /dev/null and b/doc/common/formatting/devzone.png differ diff --git a/doc/common/formatting/dita.style.css b/doc/common/formatting/dita.style.css new file mode 100644 index 0000000000000000000000000000000000000000..26ac2f4703a96136aa2b05850767d3e562e0d1c4 --- /dev/null +++ b/doc/common/formatting/dita.style.css @@ -0,0 +1,2536 @@ +/**/ + +@import url('prettify/prettify.css'); + +/*! normalize.css v2.1.3 | MIT License | git.io/normalize */ + +article, +aside, +details, +figcaption, +figure, +footer, +header, +hgroup, +main, +nav, +section, +summary +{ + display: block; +} + +audio, +canvas, +video +{ + display: inline-block; +} + +body +{ + font-family: Trebuchet, Helvetica, Arial, sans-serif; + +} + +p +{ + font-family: Trebuchet, Helvetica, Arial, sans-serif; + margin-top: 8pt; + margin-bottom: 8pt; +} + +h1 +{ + color: #76b900; + font-family: Trebuchet, Helvetica, Arial, sans-serif; + font-size: 41px; + font-weight: 600; + line-height: 1.5; +} + +h1 .small +{ + font-weight: 400; + line-height: 1; +} + +h1 small +{ + font-weight: 400; + line-height: 1; +} + +h2 +{ + color: #76b900; + font-family: Trebuchet, Helvetica, Arial, sans-serif; + font-size: 34px; + font-weight: 600; + line-height: 1.5; +} + +h2 .small +{ + font-weight: 400; + line-height: 1; +} + +h2 small +{ + font-weight: 400; + line-height: 1; +} + +h3 +{ + color: #76b900; + font-family: Trebuchet, Helvetica, Arial, sans-serif; + font-size: 28px; + font-weight: 600; + line-height: 1.4; +} + +h3 .small +{ + font-weight: 400; + line-height: 1; +} + +h3 small +{ + font-weight: 400; + line-height: 1; +} + +h4 +{ + color: #76b900; + font-family: Trebuchet, Helvetica, Arial, sans-serif; + font-size: 20px; + font-weight: 600; + line-height: 1.4; +} + +h4 .small +{ + font-weight: 400; + line-height: 1; +} + +h4 small +{ + font-weight: 400; + line-height: 1; +} + +h5 +{ + color: #76b900; + font-family: Trebuchet, Helvetica, Arial, sans-serif; + font-size: 16px; + font-weight: 600; + line-height: 1.3; +} + +h5 .small +{ + font-weight: 400; + line-height: 1; +} + +h5 small +{ + font-weight: 400; + line-height: 1; +} + +h6 +{ + color: #76b900; + font-family: Trebuchet, Helvetica, Arial, sans-serif; + font-size: 14px; + font-weight: 600; + line-height: 1.3; +} + +h6 .small +{ + font-weight: 400; + line-height: 1; +} + +h6 small +{ + font-weight: 400; + line-height: 1; +} + +.h1 +{ + color: #76b900; + font-family: Trebuchet, Helvetica, Arial, sans-serif; + font-size: 41px; + font-weight: 600; + line-height: 1.5; +} + +.h1 .small +{ + font-weight: 400; + line-height: 1; +} + +.h1 small +{ + font-weight: 400; + line-height: 1; +} + +.h2 +{ + color: #76b900; + font-family: Trebuchet, Helvetica, Arial, sans-serif; + font-size: 34px; + font-weight: 600; + line-height: 1.5; +} + +.h2 .small +{ + font-weight: 400; + line-height: 1; +} + +.h2 small +{ + font-weight: 400; + line-height: 1; +} + +.h3 +{ + color: #76b900; + font-family: Trebuchet, Helvetica, Arial, sans-serif; + font-size: 28px; + font-weight: 600; + line-height: 1.4; +} + +.h3 .small +{ + font-weight: 400; + line-height: 1; +} + +.h3 small +{ + font-weight: 400; + line-height: 1; +} + +.h4 +{ + color: #76b900; + font-family: Trebuchet, Helvetica, Arial, sans-serif; + font-size: 20px; + font-weight: 600; + line-height: 1.4; +} + +.h4 .small +{ + font-weight: 400; + line-height: 1; +} + +.h4 small +{ + font-weight: 400; + line-height: 1; +} + +.h5 +{ + color: #76b900; + font-family: Trebuchet, Helvetica, Arial, sans-serif; + font-size: 16px; + font-weight: 600; + line-height: 1.3; +} + +.h5 .small +{ + font-weight: 400; + line-height: 1; +} + +.h5 small +{ + font-weight: 400; + line-height: 1; +} + +.h6 +{ + color: #76b900; + font-family: Trebuchet, Helvetica, Arial, sans-serif; + font-size: 14px; + font-weight: 600; + line-height: 1.3; +} + +.h6 .small +{ + font-weight: 400; + line-height: 1; +} + +.h6 small +{ + font-weight: 400; + line-height: 1; +} + +.lead +{ + margin-bottom: 24px; + font-size: 18px; + font-weight: 200; + line-height: 1.4; +} + +table +{ + border-left-style: none; + border-left-width: 1px; + border-left-color: #696969; + border-right-style: none; + border-right-width: 1px; + border-right-color: #696969; + border-top-style: none; + border-top-width: 1px; + border-top-color: #696969; + border-bottom-style: none; + border-bottom-width: 1px; + border-bottom-color: #696969; + padding-left: 0px; + padding-right: 0px; + padding-top: 0px; + padding-bottom: 0px; +} + +li +{ + margin: 0.8em; + margin-left: 0; + margin-right: 0; +} + +th +{ + vertical-align: middle; + border-left-style: solid; + border-left-width: 1px; + border-left-color: #696969; + border-right-style: solid; + border-right-width: 1px; + border-right-color: #696969; + border-top-style: solid; + border-top-width: 2px; + border-top-color: #696969; + border-bottom-style: solid; + border-bottom-width: 2px; + border-bottom-color: #696969; + background-color: #b0c4de; + margin-bottom: 12px; + margin-top: 12px; + margin-left: 3px; + margin-right: 3px; + padding-left: 2px; + padding-right: 2px; + padding-bottom: 8px; + padding-top: 8px; +} + +td +{ + border-left-style: solid; + border-left-width: 1px; + border-left-color: #696969; + border-right-style: solid; + border-right-width: 1px; + border-right-color: #696969; + border-top-style: solid; + border-top-width: 1px; + border-top-color: #696969; + border-bottom-style: solid; + border-bottom-width: 1px; + border-bottom-color: #696969; + margin-top: 8px; + margin-bottom: 8px; + margin-left: 3px; + margin-right: 3px; + padding-left: 2px; + padding-right: 2px; + padding-bottom: 6px; + padding-top: 6px; +} + +td.notebox +{ + background-color: #DDEEBF + } + +a +{ + background: transparent; + color: #76b900; + text-decoration: none; +} + +a.badge:focus +{ + color: #FFF; + cursor: pointer; + text-decoration: none; +} + +a.badge:hover +{ + color: #FFF; + cursor: pointer; + text-decoration: none; +} + +a.headerlink:hover +{ + background-color: #c60f0f; + color: #FFF; +} + +a.list-group-item +{ + color: #555; +} + +a.list-group-item .list-group-item-heading +{ + color: #333; +} + +a.list-group-item.active +{ + background-color: #76b900; + border-color: #76b900; + color: #FFF; + z-index: 2; +} + +a.list-group-item.active .list-group-item-heading +{ + color: inherit; +} + +a.list-group-item.active .list-group-item-text +{ + color: #d3ff86; +} + +a.list-group-item.active > .badge +{ + background-color: #FFF; + color: #76b900; +} + +a.list-group-item.active:focus +{ + background-color: #76b900; + border-color: #76b900; + color: #FFF; + z-index: 2; +} + +a.list-group-item.active:hover +{ + background-color: #76b900; + border-color: #76b900; + color: #FFF; + z-index: 2; +} + +a.list-group-item.active:focus .list-group-item-heading +{ + color: inherit; +} + +a.list-group-item.active:focus .list-group-item-text +{ + color: #d3ff86; +} + +a.list-group-item.active:hover .list-group-item-heading +{ + color: inherit; +} + +a.list-group-item.active:hover .list-group-item-text +{ + color: #d3ff86; +} + +a.list-group-item:focus +{ + background-color: #f5f5f5; + text-decoration: none; +} + +a.list-group-item:hover +{ + background-color: #f5f5f5; + text-decoration: none; +} + +a.thumbnail.active +{ + border-color: #76b900; +} + +a.thumbnail:focus +{ + border-color: #76b900; +} + +a.thumbnail:hover +{ + border-color: #76b900; +} + +a:visited +{ + text-decoration: underline; +} + +a:focus +{ + color: #76b900; + outline: 5px auto 0; + outline-offset: -2px; + text-decoration: underline; +} + +a:hover +{ + color: #76b900; + outline: 0; + text-decoration: underline; +} + +a:active +{ + outline: 0; +} + +.btn +{ + display: inline-block; + margin-bottom: 0; + font-weight: normal; + text-align: center; + vertical-align: middle; + cursor: pointer; + background-image: none; + border: 1px solid transparent; + white-space: nowrap; + padding: 6px 12px; + font-size: 16px; + line-height: 1.5; + border-radius: 0px; + -webkit-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + -o-user-select: none; + user-select: none; +} + +.btn:focus +{ + outline: thin dotted; + outline: 5px auto -webkit-focus-ring-color; + outline-offset: -2px; +} + +.btn:hover, +.btn:focus +{ + color: #333333; + text-decoration: none; +} + +.btn:active, +.btn.active +{ + outline: 0; + background-image: none; + -webkit-box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125); + box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125); +} + +.btn.disabled, +.btn[disabled], +fieldset[disabled] .btn +{ + cursor: not-allowed; + pointer-events: none; + opacity: 0.65; + filter: alpha(opacity=65); + -webkit-box-shadow: none; + box-shadow: none; +} + +.btn-default +{ + color: #333333; + background-color: #ffffff; + border-color: #cccccc; +} + +.btn-default:hover, +.btn-default:focus, +.btn-default:active, +.btn-default.active, +.open .dropdown-toggle.btn-default +{ + color: #333333; + background-color: #ebebeb; + border-color: #adadad; +} + +.btn-default:active, +.btn-default.active, +.open .dropdown-toggle.btn-default +{ + background-image: none; +} + +.btn-default.disabled, +.btn-default[disabled], +fieldset[disabled] .btn-default, +.btn-default.disabled:hover, +.btn-default:hover[disabled], +fieldset[disabled] .btn-default:hover, +.btn-default.disabled:focus, +.btn-default:focus[disabled], +fieldset[disabled] .btn-default:focus, +.btn-default.disabled:active, +.btn-default:active[disabled], +fieldset[disabled] .btn-default:active, +.btn-default.disabled.active, +.btn-default.active[disabled], +fieldset[disabled] .btn-default.active +{ + background-color: #ffffff; + border-color: #cccccc; +} + +.btn-default .badge +{ + color: #ffffff; + background-color: #fff; +} + +.btn-primary +{ + color: #ffffff; + background-color: #76b900; + border-color: #76b900; +} + +.btn-primary:hover, +.btn-primary:focus, +.btn-primary:active, +.btn-primary.active, +.open .dropdown-toggle.btn-primary +{ + color: #ffffff; + background-color: #5c9000; + border-color: #4f7c00; +} + +.btn-primary:active, +.btn-primary.active, +.open .dropdown-toggle.btn-primary +{ + background-image: none; +} + +.btn-primary.disabled, +.btn-primary[disabled], +fieldset[disabled] .btn-primary, +.btn-primary.disabled:hover, +.btn-primary:hover[disabled], +fieldset[disabled] .btn-primary:hover, +.btn-primary.disabled:focus, +.btn-primary:focus[disabled], +fieldset[disabled] .btn-primary:focus, +.btn-primary.disabled:active, +.btn-primary:active[disabled], +fieldset[disabled] .btn-primary:active, +.btn-primary.disabled.active, +.btn-primary.active[disabled], +fieldset[disabled] .btn-primary.active +{ + background-color: #76b900; + border-color: #76b900; +} + +.btn-primary .badge +{ + color: #76b900; + background-color: #fff; +} + +.btn-warning +{ + color: #ffffff; + background-color: #f0ad4e; + border-color: #f0ad4e; +} + +.btn-warning:hover, +.btn-warning:focus, +.btn-warning:active, +.btn-warning.active, +.open .dropdown-toggle.btn-warning +{ + color: #ffffff; + background-color: #ed9c28; + border-color: #eb9316; +} + +.btn-warning:active, +.btn-warning.active, +.open .dropdown-toggle.btn-warning +{ + background-image: none; +} + +.btn-warning.disabled, +.btn-warning[disabled], +fieldset[disabled] .btn-warning, +.btn-warning.disabled:hover, +.btn-warning:hover[disabled], +fieldset[disabled] .btn-warning:hover, +.btn-warning.disabled:focus, +.btn-warning:focus[disabled], +fieldset[disabled] .btn-warning:focus, +.btn-warning.disabled:active, +.btn-warning:active[disabled], +fieldset[disabled] .btn-warning:active, +.btn-warning.disabled.active, +.btn-warning.active[disabled], +fieldset[disabled] .btn-warning.active +{ + background-color: #f0ad4e; + border-color: #f0ad4e; +} + +.btn-warning .badge +{ + color: #f0ad4e; + background-color: #fff; +} + +.btn-danger +{ + color: #ffffff; + background-color: #d9534f; + border-color: #d9534f; +} + +.btn-danger:hover, +.btn-danger:focus, +.btn-danger:active, +.btn-danger.active, +.open .dropdown-toggle.btn-danger +{ + color: #ffffff; + background-color: #d2322d; + border-color: #c12e2a; +} + +.btn-danger:active, +.btn-danger.active, +.open .dropdown-toggle.btn-danger +{ + background-image: none; +} + +.btn-danger.disabled, +.btn-danger[disabled], +fieldset[disabled] .btn-danger, +.btn-danger.disabled:hover, +.btn-danger:hover[disabled], +fieldset[disabled] .btn-danger:hover, +.btn-danger.disabled:focus, +.btn-danger:focus[disabled], +fieldset[disabled] .btn-danger:focus, +.btn-danger.disabled:active, +.btn-danger:active[disabled], +fieldset[disabled] .btn-danger:active, +.btn-danger.disabled.active, +.btn-danger.active[disabled], +fieldset[disabled] .btn-danger.active +{ + background-color: #d9534f; + border-color: #d9534f; +} + +.btn-danger .badge +{ + color: #d9534f; + background-color: #fff; +} + +.btn-success +{ + color: #ffffff; + background-color: #76b900; + border-color: #76b900; +} + +.btn-success:hover, +.btn-success:focus, +.btn-success:active, +.btn-success.active, +.open .dropdown-toggle.btn-success +{ + color: #ffffff; + background-color: #5c9000; + border-color: #4f7c00; +} + +.btn-success:active, +.btn-success.active, +.open .dropdown-toggle.btn-success +{ + background-image: none; +} + +.btn-success.disabled, +.btn-success[disabled], +fieldset[disabled] .btn-success, +.btn-success.disabled:hover, +.btn-success:hover[disabled], +fieldset[disabled] .btn-success:hover, +.btn-success.disabled:focus, +.btn-success:focus[disabled], +fieldset[disabled] .btn-success:focus, +.btn-success.disabled:active, +.btn-success:active[disabled], +fieldset[disabled] .btn-success:active, +.btn-success.disabled.active, +.btn-success.active[disabled], +fieldset[disabled] .btn-success.active +{ + background-color: #76b900; + border-color: #76b900; +} + +.btn-success .badge +{ + color: #76b900; + background-color: #fff; +} + +.btn-info +{ + color: #ffffff; + background-color: #5bc0de; + border-color: #5bc0de; +} + +.btn-info:hover, +.btn-info:focus, +.btn-info:active, +.btn-info.active, +.open .dropdown-toggle.btn-info +{ + color: #ffffff; + background-color: #39b3d7; + border-color: #2aabd2; +} + +.btn-info:active, +.btn-info.active, +.open .dropdown-toggle.btn-info +{ + background-image: none; +} + +.btn-info.disabled, +.btn-info[disabled], +fieldset[disabled] .btn-info, +.btn-info.disabled:hover, +.btn-info:hover[disabled], +fieldset[disabled] .btn-info:hover, +.btn-info.disabled:focus, +.btn-info:focus[disabled], +fieldset[disabled] .btn-info:focus, +.btn-info.disabled:active, +.btn-info:active[disabled], +fieldset[disabled] .btn-info:active, +.btn-info.disabled.active, +.btn-info.active[disabled], +fieldset[disabled] .btn-info.active +{ + background-color: #5bc0de; + border-color: #5bc0de; +} + +.btn-info .badge +{ + color: #5bc0de; + background-color: #fff; +} + +.btn-link +{ + color: #76b900; + font-weight: normal; + cursor: pointer; + border-radius: 0; +} + +.btn-link, +.btn-link:active, +.btn-link[disabled], +fieldset[disabled] .btn-link +{ + background-color: transparent; + -webkit-box-shadow: none; + box-shadow: none; +} + +.btn-link, +.btn-link:hover, +.btn-link:focus, +.btn-link:active +{ + border-color: transparent; +} + +.btn-link:hover, +.btn-link:focus +{ + color: #76b900; + text-decoration: underline; + background-color: transparent; +} + +.btn-link:hover[disabled], +fieldset[disabled] .btn-link:hover, +.btn-link:focus[disabled], +fieldset[disabled] .btn-link:focus +{ + color: #999999; + text-decoration: none; +} + +.btn-lg +{ + padding: 8px 16px; + font-size: 20px; + line-height: 1.33; + border-radius: 0px; +} + +.btn-sm +{ + padding: 5px 10px; + font-size: 14px; + line-height: 1.5; + border-radius: 0px; +} + +.btn-xs +{ + padding: 4px 6px; + font-size: 14px; + line-height: 1.5; + border-radius: 0px; +} + +.btn-block +{ + display: block; + width: 100%; + padding-left: 0; + padding-right: 0; +} + +.btn-block + .btn-block +{ + margin-top: 5px; +} + +input.btn-block[type="submit"], +input.btn-block[type="reset"], +input.btn-block[type="button"] +{ + width: 100%; +} + +.row +{ + margin-left: -10px; + margin-right: -10px; +} + +.row:before, +.row:after +{ + content: " "; + display: table; +} + +.row:after +{ + clear: both; +} + +.row:before, +.row:after +{ + content: " "; + display: table; +} + +.row:after +{ + clear: both; +} + +.col-xs-1, +.col-sm-1, +.col-md-1, +.col-lg-1, +.col-xs-2, +.col-sm-2, +.col-md-2, +.col-lg-2, +.col-xs-3, +.col-sm-3, +.col-md-3, +.col-lg-3, +.col-xs-4, +.col-sm-4, +.col-md-4, +.col-lg-4, +.col-xs-5, +.col-sm-5, +.col-md-5, +.col-lg-5, +.col-xs-6, +.col-sm-6, +.col-md-6, +.col-lg-6, +.col-xs-7, +.col-sm-7, +.col-md-7, +.col-lg-7, +.col-xs-8, +.col-sm-8, +.col-md-8, +.col-lg-8, +.col-xs-9, +.col-sm-9, +.col-md-9, +.col-lg-9, +.col-xs-10, +.col-sm-10, +.col-md-10, +.col-lg-10, +.col-xs-11, +.col-sm-11, +.col-md-11, +.col-lg-11, +.col-xs-12, +.col-sm-12, +.col-md-12, +.col-lg-12 +{ + position: relative; + min-height: 1px; + padding-left: 10px; + padding-right: 10px; +} + +.col-xs-1, +.col-xs-2, +.col-xs-3, +.col-xs-4, +.col-xs-5, +.col-xs-6, +.col-xs-7, +.col-xs-8, +.col-xs-9, +.col-xs-10, +.col-xs-11, +.col-xs-12 +{ + float: left; +} + +.col-xs-12 +{ + width: 100%; +} + +.col-xs-11 +{ + width: 91.66666666666666%; +} + +.col-xs-10 +{ + width: 83.33333333333334%; +} + +.col-xs-9 +{ + width: 75%; +} + +.col-xs-8 +{ + width: 66.66666666666666%; +} + +.col-xs-7 +{ + width: 58.333333333333336%; +} + +.col-xs-6 +{ + width: 50%; +} + +.col-xs-5 +{ + width: 41.66666666666667%; +} + +.col-xs-4 +{ + width: 33.33333333333333%; +} + +.col-xs-3 +{ + width: 25%; +} + +.col-xs-2 +{ + width: 16.666666666666664%; +} + +.col-xs-1 +{ + width: 8.333333333333332%; +} + +.col-xs-pull-12 +{ + right: 100%; +} + +.col-xs-pull-11 +{ + right: 91.66666666666666%; +} + +.col-xs-pull-10 +{ + right: 83.33333333333334%; +} + +.col-xs-pull-9 +{ + right: 75%; +} + +.col-xs-pull-8 +{ + right: 66.66666666666666%; +} + +.col-xs-pull-7 +{ + right: 58.333333333333336%; +} + +.col-xs-pull-6 +{ + right: 50%; +} + +.col-xs-pull-5 +{ + right: 41.66666666666667%; +} + +.col-xs-pull-4 +{ + right: 33.33333333333333%; +} + +.col-xs-pull-3 +{ + right: 25%; +} + +.col-xs-pull-2 +{ + right: 16.666666666666664%; +} + +.col-xs-pull-1 +{ + right: 8.333333333333332%; +} + +.col-xs-pull-0 +{ + right: 0%; +} + +.col-xs-push-12 +{ + left: 100%; +} + +.col-xs-push-11 +{ + left: 91.66666666666666%; +} + +.col-xs-push-10 +{ + left: 83.33333333333334%; +} + +.col-xs-push-9 +{ + left: 75%; +} + +.col-xs-push-8 +{ + left: 66.66666666666666%; +} + +.col-xs-push-7 +{ + left: 58.333333333333336%; +} + +.col-xs-push-6 +{ + left: 50%; +} + +.col-xs-push-5 +{ + left: 41.66666666666667%; +} + +.col-xs-push-4 +{ + left: 33.33333333333333%; +} + +.col-xs-push-3 +{ + left: 25%; +} + +.col-xs-push-2 +{ + left: 16.666666666666664%; +} + +.col-xs-push-1 +{ + left: 8.333333333333332%; +} + +.col-xs-push-0 +{ + left: 0%; +} + +.col-xs-offset-12 +{ + margin-left: 100%; +} + +.col-xs-offset-11 +{ + margin-left: 91.66666666666666%; +} + +.col-xs-offset-10 +{ + margin-left: 83.33333333333334%; +} + +.col-xs-offset-9 +{ + margin-left: 75%; +} + +.col-xs-offset-8 +{ + margin-left: 66.66666666666666%; +} + +.col-xs-offset-7 +{ + margin-left: 58.333333333333336%; +} + +.col-xs-offset-6 +{ + margin-left: 50%; +} + +.col-xs-offset-5 +{ + margin-left: 41.66666666666667%; +} + +.col-xs-offset-4 +{ + margin-left: 33.33333333333333%; +} + +.col-xs-offset-3 +{ + margin-left: 25%; +} + +.col-xs-offset-2 +{ + margin-left: 16.666666666666664%; +} + +.col-xs-offset-1 +{ + margin-left: 8.333333333333332%; +} + +.col-xs-offset-0 +{ + margin-left: 0%; +} + +.col-sm-1, +.col-sm-2, +.col-sm-3, +.col-sm-4, +.col-sm-5, +.col-sm-6, +.col-sm-7, +.col-sm-8, +.col-sm-9, +.col-sm-10, +.col-sm-11, +.col-sm-12 +{ + float: left; +} + +.col-sm-12 +{ + width: 100%; +} + +.col-sm-11 +{ + width: 91.66666666666666%; +} + +.col-sm-10 +{ + width: 83.33333333333334%; +} + +.col-sm-9 +{ + width: 75%; +} + +.col-sm-8 +{ + width: 66.66666666666666%; +} + +.col-sm-7 +{ + width: 58.333333333333336%; +} + +.col-sm-6 +{ + width: 50%; +} + +.col-sm-5 +{ + width: 41.66666666666667%; +} + +.col-sm-4 +{ + width: 33.33333333333333%; +} + +.col-sm-3 +{ + width: 25%; +} + +.col-sm-2 +{ + width: 16.666666666666664%; +} + +.col-sm-1 +{ + width: 8.333333333333332%; +} + +.col-sm-pull-12 +{ + right: 100%; +} + +.col-sm-pull-11 +{ + right: 91.66666666666666%; +} + +.col-sm-pull-10 +{ + right: 83.33333333333334%; +} + +.col-sm-pull-9 +{ + right: 75%; +} + +.col-sm-pull-8 +{ + right: 66.66666666666666%; +} + +.col-sm-pull-7 +{ + right: 58.333333333333336%; +} + +.col-sm-pull-6 +{ + right: 50%; +} + +.col-sm-pull-5 +{ + right: 41.66666666666667%; +} + +.col-sm-pull-4 +{ + right: 33.33333333333333%; +} + +.col-sm-pull-3 +{ + right: 25%; +} + +.col-sm-pull-2 +{ + right: 16.666666666666664%; +} + +.col-sm-pull-1 +{ + right: 8.333333333333332%; +} + +.col-sm-pull-0 +{ + right: 0%; +} + +.col-sm-push-12 +{ + left: 100%; +} + +.col-sm-push-11 +{ + left: 91.66666666666666%; +} + +.col-sm-push-10 +{ + left: 83.33333333333334%; +} + +.col-sm-push-9 +{ + left: 75%; +} + +.col-sm-push-8 +{ + left: 66.66666666666666%; +} + +.col-sm-push-7 +{ + left: 58.333333333333336%; +} + +.col-sm-push-6 +{ + left: 50%; +} + +.col-sm-push-5 +{ + left: 41.66666666666667%; +} + +.col-sm-push-4 +{ + left: 33.33333333333333%; +} + +.col-sm-push-3 +{ + left: 25%; +} + +.col-sm-push-2 +{ + left: 16.666666666666664%; +} + +.col-sm-push-1 +{ + left: 8.333333333333332%; +} + +.col-sm-push-0 +{ + left: 0%; +} + +.col-sm-offset-12 +{ + margin-left: 100%; +} + +.col-sm-offset-11 +{ + margin-left: 91.66666666666666%; +} + +.col-sm-offset-10 +{ + margin-left: 83.33333333333334%; +} + +.col-sm-offset-9 +{ + margin-left: 75%; +} + +.col-sm-offset-8 +{ + margin-left: 66.66666666666666%; +} + +.col-sm-offset-7 +{ + margin-left: 58.333333333333336%; +} + +.col-sm-offset-6 +{ + margin-left: 50%; +} + +.col-sm-offset-5 +{ + margin-left: 41.66666666666667%; +} + +.col-sm-offset-4 +{ + margin-left: 33.33333333333333%; +} + +.col-sm-offset-3 +{ + margin-left: 25%; +} + +.col-sm-offset-2 +{ + margin-left: 16.666666666666664%; +} + +.col-sm-offset-1 +{ + margin-left: 8.333333333333332%; +} + +.col-sm-offset-0 +{ + margin-left: 0%; +} + +.col-md-1, +.col-md-2, +.col-md-3, +.col-md-4, +.col-md-5, +.col-md-6, +.col-md-7, +.col-md-8, +.col-md-9, +.col-md-10, +.col-md-11, +.col-md-12 +{ + float: left; +} + +.col-md-12 +{ + width: 100%; +} + +.col-md-11 +{ + width: 91.66666666666666%; +} + +.col-md-10 +{ + width: 83.33333333333334%; +} + +.col-md-9 +{ + width: 75%; +} + +.col-md-8 +{ + width: 66.66666666666666%; +} + +.col-md-7 +{ + width: 58.333333333333336%; +} + +.col-md-6 +{ + width: 50%; +} + +.col-md-5 +{ + width: 41.66666666666667%; +} + +.col-md-4 +{ + width: 33.33333333333333%; +} + +.col-md-3 +{ + width: 25%; +} + +.col-md-2 +{ + width: 16.666666666666664%; +} + +.col-md-1 +{ + width: 8.333333333333332%; +} + +.col-md-pull-12 +{ + right: 100%; +} + +.col-md-pull-11 +{ + right: 91.66666666666666%; +} + +.col-md-pull-10 +{ + right: 83.33333333333334%; +} + +.col-md-pull-9 +{ + right: 75%; +} + +.col-md-pull-8 +{ + right: 66.66666666666666%; +} + +.col-md-pull-7 +{ + right: 58.333333333333336%; +} + +.col-md-pull-6 +{ + right: 50%; +} + +.col-md-pull-5 +{ + right: 41.66666666666667%; +} + +.col-md-pull-4 +{ + right: 33.33333333333333%; +} + +.col-md-pull-3 +{ + right: 25%; +} + +.col-md-pull-2 +{ + right: 16.666666666666664%; +} + +.col-md-pull-1 +{ + right: 8.333333333333332%; +} + +.col-md-pull-0 +{ + right: 0%; +} + +.col-md-push-12 +{ + left: 100%; +} + +.col-md-push-11 +{ + left: 91.66666666666666%; +} + +.col-md-push-10 +{ + left: 83.33333333333334%; +} + +.col-md-push-9 +{ + left: 75%; +} + +.col-md-push-8 +{ + left: 66.66666666666666%; +} + +.col-md-push-7 +{ + left: 58.333333333333336%; +} + +.col-md-push-6 +{ + left: 50%; +} + +.col-md-push-5 +{ + left: 41.66666666666667%; +} + +.col-md-push-4 +{ + left: 33.33333333333333%; +} + +.col-md-push-3 +{ + left: 25%; +} + +.col-md-push-2 +{ + left: 16.666666666666664%; +} + +.col-md-push-1 +{ + left: 8.333333333333332%; +} + +.col-md-push-0 +{ + left: 0%; +} + +.col-md-offset-12 +{ + margin-left: 100%; +} + +.col-md-offset-11 +{ + margin-left: 91.66666666666666%; +} + +.col-md-offset-10 +{ + margin-left: 83.33333333333334%; +} + +.col-md-offset-9 +{ + margin-left: 75%; +} + +.col-md-offset-8 +{ + margin-left: 66.66666666666666%; +} + +.col-md-offset-7 +{ + margin-left: 58.333333333333336%; +} + +.col-md-offset-6 +{ + margin-left: 50%; +} + +.col-md-offset-5 +{ + margin-left: 41.66666666666667%; +} + +.col-md-offset-4 +{ + margin-left: 33.33333333333333%; +} + +.col-md-offset-3 +{ + margin-left: 25%; +} + +.col-md-offset-2 +{ + margin-left: 16.666666666666664%; +} + +.col-md-offset-1 +{ + margin-left: 8.333333333333332%; +} + +.col-md-offset-0 +{ + margin-left: 0%; +} + +.col-lg-1, +.col-lg-2, +.col-lg-3, +.col-lg-4, +.col-lg-5, +.col-lg-6, +.col-lg-7, +.col-lg-8, +.col-lg-9, +.col-lg-10, +.col-lg-11, +.col-lg-12 +{ + float: left; +} + +.col-lg-12 +{ + width: 100%; +} + +.col-lg-11 +{ + width: 91.66666666666666%; +} + +.col-lg-10 +{ + width: 83.33333333333334%; +} + +.col-lg-9 +{ + width: 75%; +} + +.col-lg-8 +{ + width: 66.66666666666666%; +} + +.col-lg-7 +{ + width: 58.333333333333336%; +} + +.col-lg-6 +{ + width: 50%; +} + +.col-lg-5 +{ + width: 41.66666666666667%; +} + +.col-lg-4 +{ + width: 33.33333333333333%; +} + +.col-lg-3 +{ + width: 25%; +} + +.col-lg-2 +{ + width: 16.666666666666664%; +} + +.col-lg-1 +{ + width: 8.333333333333332%; +} + +.col-lg-pull-12 +{ + right: 100%; +} + +.col-lg-pull-11 +{ + right: 91.66666666666666%; +} + +.col-lg-pull-10 +{ + right: 83.33333333333334%; +} + +.col-lg-pull-9 +{ + right: 75%; +} + +.col-lg-pull-8 +{ + right: 66.66666666666666%; +} + +.col-lg-pull-7 +{ + right: 58.333333333333336%; +} + +.col-lg-pull-6 +{ + right: 50%; +} + +.col-lg-pull-5 +{ + right: 41.66666666666667%; +} + +.col-lg-pull-4 +{ + right: 33.33333333333333%; +} + +.col-lg-pull-3 +{ + right: 25%; +} + +.col-lg-pull-2 +{ + right: 16.666666666666664%; +} + +.col-lg-pull-1 +{ + right: 8.333333333333332%; +} + +.col-lg-pull-0 +{ + right: 0%; +} + +.col-lg-push-12 +{ + left: 100%; +} + +.col-lg-push-11 +{ + left: 91.66666666666666%; +} + +.col-lg-push-10 +{ + left: 83.33333333333334%; +} + +.col-lg-push-9 +{ + left: 75%; +} + +.col-lg-push-8 +{ + left: 66.66666666666666%; +} + +.col-lg-push-7 +{ + left: 58.333333333333336%; +} + +.col-lg-push-6 +{ + left: 50%; +} + +.col-lg-push-5 +{ + left: 41.66666666666667%; +} + +.col-lg-push-4 +{ + left: 33.33333333333333%; +} + +.col-lg-push-3 +{ + left: 25%; +} + +.col-lg-push-2 +{ + left: 16.666666666666664%; +} + +.col-lg-push-1 +{ + left: 8.333333333333332%; +} + +.col-lg-push-0 +{ + left: 0%; +} + +.col-lg-offset-12 +{ + margin-left: 100%; +} + +.col-lg-offset-11 +{ + margin-left: 91.66666666666666%; +} + +.col-lg-offset-10 +{ + margin-left: 83.33333333333334%; +} + +.col-lg-offset-9 +{ + margin-left: 75%; +} + +.col-lg-offset-8 +{ + margin-left: 66.66666666666666%; +} + +.col-lg-offset-7 +{ + margin-left: 58.333333333333336%; +} + +.col-lg-offset-6 +{ + margin-left: 50%; +} + +.col-lg-offset-5 +{ + margin-left: 41.66666666666667%; +} + +.col-lg-offset-4 +{ + margin-left: 33.33333333333333%; +} + +.col-lg-offset-3 +{ + margin-left: 25%; +} + +.col-lg-offset-2 +{ + margin-left: 16.666666666666664%; +} + +.col-lg-offset-1 +{ + margin-left: 8.333333333333332%; +} + +.col-lg-offset-0 +{ + margin-left: 0%; +} + +.panel +{ + margin-bottom: 24px; + background-color: #ffffff; + border: 1px solid transparent; + border-radius: 0px; + -webkit-box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05); + box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05); +} + +.panel-body +{ + padding: 15px; +} + +.panel-body:before, +.panel-body:after +{ + content: " "; + display: table; +} + +.panel-body:after +{ + clear: both; +} + +.panel-body:before, +.panel-body:after +{ + content: " "; + display: table; +} + +.panel-body:after +{ + clear: both; +} + +.panel > .list-group +{ + margin-bottom: 0; +} + +.panel > .list-group .list-group-item +{ + border-width: 1px 0; +} + +.panel > .list-group .list-group-item:first-child +{ + border-top-right-radius: 0; + border-top-left-radius: 0; +} + +.panel > .list-group .list-group-item:last-child +{ + border-bottom: 0; +} + +.panel-heading + .list-group .list-group-item:first-child +{ + border-top-width: 0; +} + +.panel > .table, +.panel > .table-responsive > .table +{ + margin-bottom: 0; +} + +.panel > .panel-body + .table, +.panel > .panel-body + .table-responsive +{ + border-top: 1px solid #dddddd; +} + +.panel > .table > tbody:first-child th, +.panel > .table > tbody:first-child td +{ + border-top: 0; +} + +.panel > .table-bordered, +.panel > .table-responsive > .table-bordered +{ + border: 0; +} + +.panel > .table-bordered > thead > tr > th:first-child, +.panel > .table-responsive > .table-bordered > thead > tr > th:first-child, +.panel > .table-bordered > tbody > tr > th:first-child, +.panel > .table-responsive > .table-bordered > tbody > tr > th:first-child, +.panel > .table-bordered > tfoot > tr > th:first-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > th:first-child, +.panel > .table-bordered > thead > tr > td:first-child, +.panel > .table-responsive > .table-bordered > thead > tr > td:first-child, +.panel > .table-bordered > tbody > tr > td:first-child, +.panel > .table-responsive > .table-bordered > tbody > tr > td:first-child, +.panel > .table-bordered > tfoot > tr > td:first-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > td:first-child +{ + border-left: 0; +} + +.panel > .table-bordered > thead > tr > th:last-child, +.panel > .table-responsive > .table-bordered > thead > tr > th:last-child, +.panel > .table-bordered > tbody > tr > th:last-child, +.panel > .table-responsive > .table-bordered > tbody > tr > th:last-child, +.panel > .table-bordered > tfoot > tr > th:last-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > th:last-child, +.panel > .table-bordered > thead > tr > td:last-child, +.panel > .table-responsive > .table-bordered > thead > tr > td:last-child, +.panel > .table-bordered > tbody > tr > td:last-child, +.panel > .table-responsive > .table-bordered > tbody > tr > td:last-child, +.panel > .table-bordered > tfoot > tr > td:last-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > td:last-child +{ + border-right: 0; +} + +.panel > .table-bordered > thead > tr:last-child > th, +.panel > .table-responsive > .table-bordered > thead > tr:last-child > th, +.panel > .table-bordered > tbody > tr:last-child > th, +.panel > .table-responsive > .table-bordered > tbody > tr:last-child > th, +.panel > .table-bordered > tfoot > tr:last-child > th, +.panel > .table-responsive > .table-bordered > tfoot > tr:last-child > th, +.panel > .table-bordered > thead > tr:last-child > td, +.panel > .table-responsive > .table-bordered > thead > tr:last-child > td, +.panel > .table-bordered > tbody > tr:last-child > td, +.panel > .table-responsive > .table-bordered > tbody > tr:last-child > td, +.panel > .table-bordered > tfoot > tr:last-child > td, +.panel > .table-responsive > .table-bordered > tfoot > tr:last-child > td +{ + border-bottom: 0; +} + +.panel > .table-responsive +{ + border: 0; + margin-bottom: 0; +} + +.panel-heading +{ + padding: 10px 15px; + border-bottom: 1px solid transparent; + border-top-right-radius: -1px; + border-top-left-radius: -1px; +} + +.panel-heading > .dropdown .dropdown-toggle +{ + color: inherit; +} + +.panel-title +{ + margin-top: 0; + margin-bottom: 0; + font-size: 18px; + color: inherit; +} + +.panel-title > a +{ + color: inherit; +} + +.panel-footer +{ + padding: 10px 15px; + background-color: #f5f5f5; + border-top: 1px solid #dddddd; + border-bottom-right-radius: -1px; + border-bottom-left-radius: -1px; +} + +.panel-group .panel +{ + margin-bottom: 0; + border-radius: 0px; + overflow: hidden; +} + +.panel-group .panel + .panel +{ + margin-top: 5px; +} + +.panel-group .panel-heading +{ + border-bottom: 0; +} + +.panel-group .panel-heading + .panel-collapse .panel-body +{ + border-top: 1px solid #dddddd; +} + +.panel-group .panel-footer +{ + border-top: 0; +} + +.panel-group .panel-footer + .panel-collapse .panel-body +{ + border-bottom: 1px solid #dddddd; +} + +.panel-default +{ + border-color: #dddddd; +} + +.panel-default > .panel-heading +{ + color: #595959; + background-color: #f5f5f5; + border-color: #dddddd; +} + +.panel-default > .panel-heading + .panel-collapse .panel-body +{ + border-top-color: #dddddd; +} + +.panel-default > .panel-footer + .panel-collapse .panel-body +{ + border-bottom-color: #dddddd; +} + +.panel-primary +{ + border-color: #76b900; +} + +.panel-primary > .panel-heading +{ + color: #ffffff; + background-color: #76b900; + border-color: #76b900; +} + +.panel-primary > .panel-heading + .panel-collapse .panel-body +{ + border-top-color: #76b900; +} + +.panel-primary > .panel-footer + .panel-collapse .panel-body +{ + border-bottom-color: #76b900; +} + +.panel-success +{ + border-color: #d6e9c6; +} + +.panel-success > .panel-heading +{ + color: #3c763d; + background-color: #dff0d8; + border-color: #d6e9c6; +} + +.panel-success > .panel-heading + .panel-collapse .panel-body +{ + border-top-color: #d6e9c6; +} + +.panel-success > .panel-footer + .panel-collapse .panel-body +{ + border-bottom-color: #d6e9c6; +} + +.panel-warning +{ + border-color: #faebcc; +} + +.panel-warning > .panel-heading +{ + color: #8a6d3b; + background-color: #fcf8e3; + border-color: #faebcc; +} + +.panel-warning > .panel-heading + .panel-collapse .panel-body +{ + border-top-color: #faebcc; +} + +.panel-warning > .panel-footer + .panel-collapse .panel-body +{ + border-bottom-color: #faebcc; +} + +.panel-danger +{ + border-color: #ebccd1; +} + +.panel-danger > .panel-heading +{ + color: #a94442; + background-color: #f2dede; + border-color: #ebccd1; +} + +.panel-danger > .panel-heading + .panel-collapse .panel-body +{ + border-top-color: #ebccd1; +} + +.panel-danger > .panel-footer + .panel-collapse .panel-body +{ + border-bottom-color: #ebccd1; +} + +.panel-info +{ + border-color: #bce8f1; +} + +.panel-info > .panel-heading +{ + color: #31708f; + background-color: #d9edf7; + border-color: #bce8f1; +} + +.panel-info > .panel-heading + .panel-collapse .panel-body +{ + border-top-color: #bce8f1; +} + +.panel-info > .panel-footer + .panel-collapse .panel-body +{ + border-bottom-color: #bce8f1; +} + +.content-sidebar .panel-heading +{ + text-transform: uppercase; + font-weight: bold; +} + +.panel-success +{ + border-color: #76b900; +} + +.panel-success > .panel-heading +{ + color: #ffffff; + background-color: #76b900; + border-color: #76b900; +} + +.panel-success > .panel-heading + .panel-collapse .panel-body +{ + border-top-color: #76b900; +} + +.panel-success > .panel-footer + .panel-collapse .panel-body +{ + border-bottom-color: #76b900; +} + +.panel-success .panel-heading a +{ + color: white; +} + +*, +*:before, +*:after +{ + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; +} + diff --git a/doc/common/formatting/html5shiv-printshiv.min.js b/doc/common/formatting/html5shiv-printshiv.min.js new file mode 100644 index 0000000000000000000000000000000000000000..7a6bf0107cebdbb506b8f5cdae4d64c836ae178c --- /dev/null +++ b/doc/common/formatting/html5shiv-printshiv.min.js @@ -0,0 +1,11 @@ +/* + HTML5 Shiv v3.6.2 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed +*/ +(function(j,f){function s(a,b){var c=a.createElement("p"),m=a.getElementsByTagName("head")[0]||a.documentElement;c.innerHTML="x";return m.insertBefore(c.lastChild,m.firstChild)}function o(){var a=d.elements;return"string"==typeof a?a.split(" "):a}function n(a){var b=t[a[u]];b||(b={},p++,a[u]=p,t[p]=b);return b}function v(a,b,c){b||(b=f);if(e)return b.createElement(a);c||(c=n(b));b=c.cache[a]?c.cache[a].cloneNode():y.test(a)?(c.cache[a]=c.createElem(a)).cloneNode():c.createElem(a); +return b.canHaveChildren&&!z.test(a)?c.frag.appendChild(b):b}function A(a,b){if(!b.cache)b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag();a.createElement=function(c){return!d.shivMethods?b.createElem(c):v(c,a,b)};a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+o().join().replace(/\w+/g,function(a){b.createElem(a);b.frag.createElement(a);return'c("'+a+'")'})+");return n}")(d,b.frag)} +function w(a){a||(a=f);var b=n(a);if(d.shivCSS&&!q&&!b.hasCSS)b.hasCSS=!!s(a,"article,aside,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}");e||A(a,b);return a}function B(a){for(var b,c=a.attributes,m=c.length,f=a.ownerDocument.createElement(l+":"+a.nodeName);m--;)b=c[m],b.specified&&f.setAttribute(b.nodeName,b.nodeValue);f.style.cssText=a.style.cssText;return f}function x(a){function b(){clearTimeout(d._removeSheetTimer);c&&c.removeNode(!0); +c=null}var c,f,d=n(a),e=a.namespaces,j=a.parentWindow;if(!C||a.printShived)return a;"undefined"==typeof e[l]&&e.add(l);j.attachEvent("onbeforeprint",function(){b();var g,i,d;d=a.styleSheets;for(var e=[],h=d.length,k=Array(h);h--;)k[h]=d[h];for(;d=k.pop();)if(!d.disabled&&D.test(d.media)){try{g=d.imports,i=g.length}catch(j){i=0}for(h=0;h+~])("+o().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"); +for(k="$1"+l+"\\:$2";i--;)e=g[i]=g[i].split("}"),e[e.length-1]=e[e.length-1].replace(h,k),g[i]=e.join("}");e=g.join("{");i=a.getElementsByTagName("*");h=i.length;k=RegExp("^(?:"+o().join("|")+")$","i");for(d=[];h--;)g=i[h],k.test(g.nodeName)&&d.push(g.applyElement(B(g)));f=d;c=s(a,e)});j.attachEvent("onafterprint",function(){for(var a=f,c=a.length;c--;)a[c].removeNode();clearTimeout(d._removeSheetTimer);d._removeSheetTimer=setTimeout(b,500)});a.printShived=!0;return a}var r=j.html5||{},z=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i, +y=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q,u="_html5shiv",p=0,t={},e;(function(){try{var a=f.createElement("a");a.innerHTML="";q="hidden"in a;var b;if(!(b=1==a.childNodes.length)){f.createElement("a");var c=f.createDocumentFragment();b="undefined"==typeof c.cloneNode||"undefined"==typeof c.createDocumentFragment||"undefined"==typeof c.createElement}e=b}catch(d){e=q=!0}})();var d={elements:r.elements||"abbr article aside audio bdi canvas data datalist details figcaption figure footer header hgroup main mark meter nav output progress section summary time video", +version:"3.6.2",shivCSS:!1!==r.shivCSS,supportsUnknownElements:e,shivMethods:!1!==r.shivMethods,type:"default",shivDocument:w,createElement:v,createDocumentFragment:function(a,b){a||(a=f);if(e)return a.createDocumentFragment();for(var b=b||n(a),c=b.frag.cloneNode(),d=0,j=o(),l=j.length;d7);function a(j){j=j||location.href;return"#"+j.replace(/^[^#]*#?(.*)$/,"$1")}$.fn[c]=function(j){return j?this.bind(c,j):this.trigger(c)};$.fn[c].delay=50;g[c]=$.extend(g[c],{setup:function(){if(d){return false}$(f.start)},teardown:function(){if(d){return false}$(f.stop)}});f=(function(){var j={},p,m=a(),k=function(q){return q},l=k,o=k;j.start=function(){p||n()};j.stop=function(){p&&clearTimeout(p);p=b};function n(){var r=a(),q=o(m);if(r!==m){l(m=r,q);$(e).trigger(c)}else{if(q!==m){location.href=location.href.replace(/#.*/,"")+q}}p=setTimeout(n,$.fn[c].delay)}!d&&(function(){var q,r;j.start=function(){if(!q){r=$.fn[c].src;r=r&&r+a();q=$('