| /* | |
| * Copyright 2011-2020 NVIDIA Corporation. All rights reserved. | |
| * | |
| * NOTICE TO LICENSEE: | |
| * | |
| * This source code and/or documentation ("Licensed Deliverables") are | |
| * subject to NVIDIA intellectual property rights under U.S. and | |
| * international Copyright laws. | |
| * | |
| * These Licensed Deliverables contained herein is PROPRIETARY and | |
| * CONFIDENTIAL to NVIDIA and is being provided under the terms and | |
| * conditions of a form of NVIDIA software license agreement by and | |
| * between NVIDIA and Licensee ("License Agreement") or electronically | |
| * accepted by Licensee. Notwithstanding any terms or conditions to | |
| * the contrary in the License Agreement, reproduction or disclosure | |
| * of the Licensed Deliverables to any third party without the express | |
| * written consent of NVIDIA is prohibited. | |
| * | |
| * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE | |
| * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE | |
| * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS | |
| * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. | |
| * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED | |
| * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, | |
| * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. | |
| * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE | |
| * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY | |
| * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY | |
| * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | |
| * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS | |
| * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE | |
| * OF THESE LICENSED DELIVERABLES. | |
| * | |
| * U.S. Government End Users. These Licensed Deliverables are a | |
| * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT | |
| * 1995), consisting of "commercial computer software" and "commercial | |
| * computer software documentation" as such terms are used in 48 | |
| * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government | |
| * only as a commercial end item. Consistent with 48 C.F.R.12.212 and | |
| * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all | |
| * U.S. Government End Users acquire the Licensed Deliverables with | |
| * only those rights set forth herein. | |
| * | |
| * Any use of the Licensed Deliverables in individual and commercial | |
| * software must include, in the user documentation and internal | |
| * comments to the code, the above Disclaimer and U.S. Government End | |
| * Users Notice. | |
| */ | |
| extern "C" { | |
| /** | |
| * \defgroup CUPTI_METRIC_API CUPTI Metric API | |
| * Functions, types, and enums that implement the CUPTI Metric API. | |
| * | |
| * \note CUPTI metric API from the header cupti_metrics.h are not supported on devices | |
| * with compute capability 7.5 and higher (i.e. Turing and later GPU architectures). | |
| * These API will be deprecated in a future CUDA release. These are replaced by | |
| * Profiling API in the header cupti_profiler_target.h and Perfworks metrics API | |
| * in the headers nvperf_host.h and nvperf_target.h which are supported on | |
| * devices with compute capability 7.0 and higher (i.e. Volta and later GPU | |
| * architectures). | |
| * | |
| * @{ | |
| */ | |
| /** | |
| * \brief ID for a metric. | |
| * | |
| * A metric provides a measure of some aspect of the device. | |
| */ | |
| typedef uint32_t CUpti_MetricID; | |
| /** | |
| * \brief A metric category. | |
| * | |
| * Each metric is assigned to a category that represents the general | |
| * type of the metric. A metric's category is accessed using \ref | |
| * cuptiMetricGetAttribute and the CUPTI_METRIC_ATTR_CATEGORY | |
| * attribute. | |
| */ | |
| typedef enum { | |
| /** | |
| * A memory related metric. | |
| */ | |
| CUPTI_METRIC_CATEGORY_MEMORY = 0, | |
| /** | |
| * An instruction related metric. | |
| */ | |
| CUPTI_METRIC_CATEGORY_INSTRUCTION = 1, | |
| /** | |
| * A multiprocessor related metric. | |
| */ | |
| CUPTI_METRIC_CATEGORY_MULTIPROCESSOR = 2, | |
| /** | |
| * A cache related metric. | |
| */ | |
| CUPTI_METRIC_CATEGORY_CACHE = 3, | |
| /** | |
| * A texture related metric. | |
| */ | |
| CUPTI_METRIC_CATEGORY_TEXTURE = 4, | |
| /** | |
| *A Nvlink related metric. | |
| */ | |
| CUPTI_METRIC_CATEGORY_NVLINK = 5, | |
| /** | |
| *A PCIe related metric. | |
| */ | |
| CUPTI_METRIC_CATEGORY_PCIE = 6, | |
| CUPTI_METRIC_CATEGORY_FORCE_INT = 0x7fffffff, | |
| } CUpti_MetricCategory; | |
| /** | |
| * \brief A metric evaluation mode. | |
| * | |
| * A metric can be evaluated per hardware instance to know the load balancing | |
| * across instances of a domain or the metric can be evaluated in aggregate mode | |
| * when the events involved in metric evaluation are from different event | |
| * domains. It might be possible to evaluate some metrics in both | |
| * modes for convenience. A metric's evaluation mode is accessed using \ref | |
| * CUpti_MetricEvaluationMode and the CUPTI_METRIC_ATTR_EVALUATION_MODE | |
| * attribute. | |
| */ | |
| typedef enum { | |
| /** | |
| * If this bit is set, the metric can be profiled for each instance of the | |
| * domain. The event values passed to \ref cuptiMetricGetValue can contain | |
| * values for one instance of the domain. And \ref cuptiMetricGetValue can | |
| * be called for each instance. | |
| */ | |
| CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE = 1, | |
| /** | |
| * If this bit is set, the metric can be profiled over all instances. The | |
| * event values passed to \ref cuptiMetricGetValue can be aggregated values | |
| * of events for all instances of the domain. | |
| */ | |
| CUPTI_METRIC_EVALUATION_MODE_AGGREGATE = 1 << 1, | |
| CUPTI_METRIC_EVALUATION_MODE_FORCE_INT = 0x7fffffff, | |
| } CUpti_MetricEvaluationMode; | |
| /** | |
| * \brief Kinds of metric values. | |
| * | |
| * Metric values can be one of several different kinds. Corresponding | |
| * to each kind is a member of the CUpti_MetricValue union. The metric | |
| * value returned by \ref cuptiMetricGetValue should be accessed using | |
| * the appropriate member of that union based on its value kind. | |
| */ | |
| typedef enum { | |
| /** | |
| * The metric value is a 64-bit double. | |
| */ | |
| CUPTI_METRIC_VALUE_KIND_DOUBLE = 0, | |
| /** | |
| * The metric value is a 64-bit unsigned integer. | |
| */ | |
| CUPTI_METRIC_VALUE_KIND_UINT64 = 1, | |
| /** | |
| * The metric value is a percentage represented by a 64-bit | |
| * double. For example, 57.5% is represented by the value 57.5. | |
| */ | |
| CUPTI_METRIC_VALUE_KIND_PERCENT = 2, | |
| /** | |
| * The metric value is a throughput represented by a 64-bit | |
| * integer. The unit for throughput values is bytes/second. | |
| */ | |
| CUPTI_METRIC_VALUE_KIND_THROUGHPUT = 3, | |
| /** | |
| * The metric value is a 64-bit signed integer. | |
| */ | |
| CUPTI_METRIC_VALUE_KIND_INT64 = 4, | |
| /** | |
| * The metric value is a utilization level, as represented by | |
| * CUpti_MetricValueUtilizationLevel. | |
| */ | |
| CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL = 5, | |
| CUPTI_METRIC_VALUE_KIND_FORCE_INT = 0x7fffffff | |
| } CUpti_MetricValueKind; | |
| /** | |
| * \brief Enumeration of utilization levels for metrics values of kind | |
| * CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL. Utilization values can | |
| * vary from IDLE (0) to MAX (10) but the enumeration only provides | |
| * specific names for a few values. | |
| */ | |
| typedef enum { | |
| CUPTI_METRIC_VALUE_UTILIZATION_IDLE = 0, | |
| CUPTI_METRIC_VALUE_UTILIZATION_LOW = 2, | |
| CUPTI_METRIC_VALUE_UTILIZATION_MID = 5, | |
| CUPTI_METRIC_VALUE_UTILIZATION_HIGH = 8, | |
| CUPTI_METRIC_VALUE_UTILIZATION_MAX = 10, | |
| CUPTI_METRIC_VALUE_UTILIZATION_FORCE_INT = 0x7fffffff | |
| } CUpti_MetricValueUtilizationLevel; | |
| /** | |
| * \brief Metric attributes. | |
| * | |
| * Metric attributes describe properties of a metric. These attributes | |
| * can be read using \ref cuptiMetricGetAttribute. | |
| */ | |
| typedef enum { | |
| /** | |
| * Metric name. Value is a null terminated const c-string. | |
| */ | |
| CUPTI_METRIC_ATTR_NAME = 0, | |
| /** | |
| * Short description of metric. Value is a null terminated const c-string. | |
| */ | |
| CUPTI_METRIC_ATTR_SHORT_DESCRIPTION = 1, | |
| /** | |
| * Long description of metric. Value is a null terminated const c-string. | |
| */ | |
| CUPTI_METRIC_ATTR_LONG_DESCRIPTION = 2, | |
| /** | |
| * Category of the metric. Value is of type CUpti_MetricCategory. | |
| */ | |
| CUPTI_METRIC_ATTR_CATEGORY = 3, | |
| /** | |
| * Value type of the metric. Value is of type CUpti_MetricValueKind. | |
| */ | |
| CUPTI_METRIC_ATTR_VALUE_KIND = 4, | |
| /** | |
| * Metric evaluation mode. Value is of type CUpti_MetricEvaluationMode. | |
| */ | |
| CUPTI_METRIC_ATTR_EVALUATION_MODE = 5, | |
| CUPTI_METRIC_ATTR_FORCE_INT = 0x7fffffff, | |
| } CUpti_MetricAttribute; | |
| /** | |
| * \brief A metric value. | |
| * | |
| * Metric values can be one of several different kinds. Corresponding | |
| * to each kind is a member of the CUpti_MetricValue union. The metric | |
| * value returned by \ref cuptiMetricGetValue should be accessed using | |
| * the appropriate member of that union based on its value kind. | |
| */ | |
| typedef union { | |
| /* | |
| * Value for CUPTI_METRIC_VALUE_KIND_DOUBLE. | |
| */ | |
| double metricValueDouble; | |
| /* | |
| * Value for CUPTI_METRIC_VALUE_KIND_UINT64. | |
| */ | |
| uint64_t metricValueUint64; | |
| /* | |
| * Value for CUPTI_METRIC_VALUE_KIND_INT64. | |
| */ | |
| int64_t metricValueInt64; | |
| /* | |
| * Value for CUPTI_METRIC_VALUE_KIND_PERCENT. For example, 57.5% is | |
| * represented by the value 57.5. | |
| */ | |
| double metricValuePercent; | |
| /* | |
| * Value for CUPTI_METRIC_VALUE_KIND_THROUGHPUT. The unit for | |
| * throughput values is bytes/second. | |
| */ | |
| uint64_t metricValueThroughput; | |
| /* | |
| * Value for CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL. | |
| */ | |
| CUpti_MetricValueUtilizationLevel metricValueUtilizationLevel; | |
| } CUpti_MetricValue; | |
| /** | |
| * \brief Device class. | |
| * | |
| * Enumeration of device classes for metric property | |
| * CUPTI_METRIC_PROPERTY_DEVICE_CLASS. | |
| */ | |
| typedef enum { | |
| CUPTI_METRIC_PROPERTY_DEVICE_CLASS_TESLA = 0, | |
| CUPTI_METRIC_PROPERTY_DEVICE_CLASS_QUADRO = 1, | |
| CUPTI_METRIC_PROPERTY_DEVICE_CLASS_GEFORCE = 2, | |
| CUPTI_METRIC_PROPERTY_DEVICE_CLASS_TEGRA = 3, | |
| } CUpti_MetricPropertyDeviceClass; | |
| /** | |
| * \brief Metric device properties. | |
| * | |
| * Metric device properties describe device properties which are needed for a metric. | |
| * Some of these properties can be collected using cuDeviceGetAttribute. | |
| */ | |
| typedef enum { | |
| /* | |
| * Number of multiprocessors on a device. This can be collected | |
| * using value of \param CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT of | |
| * cuDeviceGetAttribute. | |
| */ | |
| CUPTI_METRIC_PROPERTY_MULTIPROCESSOR_COUNT, | |
| /* | |
| * Maximum number of warps on a multiprocessor. This can be | |
| * collected using ratio of value of \param | |
| * CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR and \param | |
| * CU_DEVICE_ATTRIBUTE_WARP_SIZE of cuDeviceGetAttribute. | |
| */ | |
| CUPTI_METRIC_PROPERTY_WARPS_PER_MULTIPROCESSOR, | |
| /* | |
| * GPU Time for kernel in ns. This should be profiled using CUPTI | |
| * Activity API. | |
| */ | |
| CUPTI_METRIC_PROPERTY_KERNEL_GPU_TIME, | |
| /* | |
| * Clock rate for device in KHz. This should be collected using | |
| * value of \param CU_DEVICE_ATTRIBUTE_CLOCK_RATE of | |
| * cuDeviceGetAttribute. | |
| */ | |
| CUPTI_METRIC_PROPERTY_CLOCK_RATE, | |
| /* | |
| * Number of Frame buffer units for device. This should be collected | |
| * using value of \param CUPTI_DEVICE_ATTRIBUTE_MAX_FRAME_BUFFERS of | |
| * cuptiDeviceGetAttribute. | |
| */ | |
| CUPTI_METRIC_PROPERTY_FRAME_BUFFER_COUNT, | |
| /* | |
| * Global memory bandwidth in KBytes/sec. This should be collected | |
| * using value of \param CUPTI_DEVICE_ATTR_GLOBAL_MEMORY_BANDWIDTH | |
| * of cuptiDeviceGetAttribute. | |
| */ | |
| CUPTI_METRIC_PROPERTY_GLOBAL_MEMORY_BANDWIDTH, | |
| /* | |
| * PCIE link rate in Mega bits/sec. This should be collected using | |
| * value of \param CUPTI_DEVICE_ATTR_PCIE_LINK_RATE of | |
| * cuptiDeviceGetAttribute. | |
| */ | |
| CUPTI_METRIC_PROPERTY_PCIE_LINK_RATE, | |
| /* | |
| * PCIE link width for device. This should be collected using | |
| * value of \param CUPTI_DEVICE_ATTR_PCIE_LINK_WIDTH of | |
| * cuptiDeviceGetAttribute. | |
| */ | |
| CUPTI_METRIC_PROPERTY_PCIE_LINK_WIDTH, | |
| /* | |
| * PCIE generation for device. This should be collected using | |
| * value of \param CUPTI_DEVICE_ATTR_PCIE_GEN of | |
| * cuptiDeviceGetAttribute. | |
| */ | |
| CUPTI_METRIC_PROPERTY_PCIE_GEN, | |
| /* | |
| * The device class. This should be collected using | |
| * value of \param CUPTI_DEVICE_ATTR_DEVICE_CLASS of | |
| * cuptiDeviceGetAttribute. | |
| */ | |
| CUPTI_METRIC_PROPERTY_DEVICE_CLASS, | |
| /* | |
| * Peak single precision floating point operations that | |
| * can be performed in one cycle by the device. | |
| * This should be collected using value of | |
| * \param CUPTI_DEVICE_ATTR_FLOP_SP_PER_CYCLE of | |
| * cuptiDeviceGetAttribute. | |
| */ | |
| CUPTI_METRIC_PROPERTY_FLOP_SP_PER_CYCLE, | |
| /* | |
| * Peak double precision floating point operations that | |
| * can be performed in one cycle by the device. | |
| * This should be collected using value of | |
| * \param CUPTI_DEVICE_ATTR_FLOP_DP_PER_CYCLE of | |
| * cuptiDeviceGetAttribute. | |
| */ | |
| CUPTI_METRIC_PROPERTY_FLOP_DP_PER_CYCLE, | |
| /* | |
| * Number of L2 units on a device. This can be collected | |
| * using value of \param CUPTI_DEVICE_ATTR_MAX_L2_UNITS of | |
| * cuDeviceGetAttribute. | |
| */ | |
| CUPTI_METRIC_PROPERTY_L2_UNITS, | |
| /* | |
| * Whether ECC support is enabled on the device. This can be | |
| * collected using value of \param CU_DEVICE_ATTRIBUTE_ECC_ENABLED of | |
| * cuDeviceGetAttribute. | |
| */ | |
| CUPTI_METRIC_PROPERTY_ECC_ENABLED, | |
| /* | |
| * Peak half precision floating point operations that | |
| * can be performed in one cycle by the device. | |
| * This should be collected using value of | |
| * \param CUPTI_DEVICE_ATTR_FLOP_HP_PER_CYCLE of | |
| * cuptiDeviceGetAttribute. | |
| */ | |
| CUPTI_METRIC_PROPERTY_FLOP_HP_PER_CYCLE, | |
| /* | |
| * NVLINK Bandwitdh for device. This should be collected | |
| * using value of \param CUPTI_DEVICE_ATTR_GPU_CPU_NVLINK_BW of | |
| * cuptiDeviceGetAttribute. | |
| */ | |
| CUPTI_METRIC_PROPERTY_GPU_CPU_NVLINK_BANDWIDTH, | |
| } CUpti_MetricPropertyID; | |
| /** | |
| * \brief Get the total number of metrics available on any device. | |
| * | |
| * Returns the total number of metrics available on any CUDA-capable | |
| * devices. | |
| * | |
| * \param numMetrics Returns the number of metrics | |
| * | |
| * \retval CUPTI_SUCCESS | |
| * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numMetrics is NULL | |
| */ | |
| CUptiResult CUPTIAPI cuptiGetNumMetrics(uint32_t *numMetrics); | |
| /** | |
| * \brief Get all the metrics available on any device. | |
| * | |
| * Returns the metric IDs in \p metricArray for all CUDA-capable | |
| * devices. The size of the \p metricArray buffer is given by \p | |
| * *arraySizeBytes. The size of the \p metricArray buffer must be at | |
| * least \p numMetrics * sizeof(CUpti_MetricID) or all metric IDs will | |
| * not be returned. The value returned in \p *arraySizeBytes contains | |
| * the number of bytes returned in \p metricArray. | |
| * | |
| * \param arraySizeBytes The size of \p metricArray in bytes, and | |
| * returns the number of bytes written to \p metricArray | |
| * \param metricArray Returns the IDs of the metrics | |
| * | |
| * \retval CUPTI_SUCCESS | |
| * \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or | |
| * \p metricArray are NULL | |
| */ | |
| CUptiResult CUPTIAPI cuptiEnumMetrics(size_t *arraySizeBytes, | |
| CUpti_MetricID *metricArray); | |
| /** | |
| * \brief Get the number of metrics for a device. | |
| * | |
| * Returns the number of metrics available for a device. | |
| * | |
| * \param device The CUDA device | |
| * \param numMetrics Returns the number of metrics available for the | |
| * device | |
| * | |
| * \retval CUPTI_SUCCESS | |
| * \retval CUPTI_ERROR_NOT_INITIALIZED | |
| * \retval CUPTI_ERROR_INVALID_DEVICE | |
| * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numMetrics is NULL | |
| */ | |
| CUptiResult CUPTIAPI cuptiDeviceGetNumMetrics(CUdevice device, | |
| uint32_t *numMetrics); | |
| /** | |
| * \brief Get the metrics for a device. | |
| * | |
| * Returns the metric IDs in \p metricArray for a device. The size of | |
| * the \p metricArray buffer is given by \p *arraySizeBytes. The size | |
| * of the \p metricArray buffer must be at least \p numMetrics * | |
| * sizeof(CUpti_MetricID) or else all metric IDs will not be | |
| * returned. The value returned in \p *arraySizeBytes contains the | |
| * number of bytes returned in \p metricArray. | |
| * | |
| * \param device The CUDA device | |
| * \param arraySizeBytes The size of \p metricArray in bytes, and | |
| * returns the number of bytes written to \p metricArray | |
| * \param metricArray Returns the IDs of the metrics for the device | |
| * | |
| * \retval CUPTI_SUCCESS | |
| * \retval CUPTI_ERROR_NOT_INITIALIZED | |
| * \retval CUPTI_ERROR_INVALID_DEVICE | |
| * \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or | |
| * \p metricArray are NULL | |
| */ | |
| CUptiResult CUPTIAPI cuptiDeviceEnumMetrics(CUdevice device, | |
| size_t *arraySizeBytes, | |
| CUpti_MetricID *metricArray); | |
| /** | |
| * \brief Get a metric attribute. | |
| * | |
| * Returns a metric attribute in \p *value. The size of the \p | |
| * value buffer is given by \p *valueSize. The value returned in \p | |
| * *valueSize contains the number of bytes returned in \p value. | |
| * | |
| * If the attribute value is a c-string that is longer than \p | |
| * *valueSize, then only the first \p *valueSize characters will be | |
| * returned and there will be no terminating null byte. | |
| * | |
| * \param metric ID of the metric | |
| * \param attrib The metric attribute to read | |
| * \param valueSize The size of the \p value buffer in bytes, and | |
| * returns the number of bytes written to \p value | |
| * \param value Returns the attribute's value | |
| * | |
| * \retval CUPTI_SUCCESS | |
| * \retval CUPTI_ERROR_NOT_INITIALIZED | |
| * \retval CUPTI_ERROR_INVALID_METRIC_ID | |
| * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value | |
| * is NULL, or if \p attrib is not a metric attribute | |
| * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string | |
| * attribute values, indicates that the \p value buffer is too small | |
| * to hold the attribute value. | |
| */ | |
| CUptiResult CUPTIAPI cuptiMetricGetAttribute(CUpti_MetricID metric, | |
| CUpti_MetricAttribute attrib, | |
| size_t *valueSize, | |
| void *value); | |
| /** | |
| * \brief Find an metric by name. | |
| * | |
| * Find a metric by name and return the metric ID in \p *metric. | |
| * | |
| * \param device The CUDA device | |
| * \param metricName The name of metric to find | |
| * \param metric Returns the ID of the found metric or undefined if | |
| * unable to find the metric | |
| * | |
| * \retval CUPTI_SUCCESS | |
| * \retval CUPTI_ERROR_NOT_INITIALIZED | |
| * \retval CUPTI_ERROR_INVALID_DEVICE | |
| * \retval CUPTI_ERROR_INVALID_METRIC_NAME if unable to find a metric | |
| * with name \p metricName. In this case \p *metric is undefined | |
| * \retval CUPTI_ERROR_INVALID_PARAMETER if \p metricName or \p | |
| * metric are NULL. | |
| */ | |
| CUptiResult CUPTIAPI cuptiMetricGetIdFromName(CUdevice device, | |
| const char *metricName, | |
| CUpti_MetricID *metric); | |
| /** | |
| * \brief Get number of events required to calculate a metric. | |
| * | |
| * Returns the number of events in \p numEvents that are required to | |
| * calculate a metric. | |
| * | |
| * \param metric ID of the metric | |
| * \param numEvents Returns the number of events required for the metric | |
| * | |
| * \retval CUPTI_SUCCESS | |
| * \retval CUPTI_ERROR_NOT_INITIALIZED | |
| * \retval CUPTI_ERROR_INVALID_METRIC_ID | |
| * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numEvents is NULL | |
| */ | |
| CUptiResult CUPTIAPI cuptiMetricGetNumEvents(CUpti_MetricID metric, | |
| uint32_t *numEvents); | |
| /** | |
| * \brief Get the events required to calculating a metric. | |
| * | |
| * Gets the event IDs in \p eventIdArray required to calculate a \p | |
| * metric. The size of the \p eventIdArray buffer is given by \p | |
| * *eventIdArraySizeBytes and must be at least \p numEvents * | |
| * sizeof(CUpti_EventID) or all events will not be returned. The value | |
| * returned in \p *eventIdArraySizeBytes contains the number of bytes | |
| * returned in \p eventIdArray. | |
| * | |
| * \param metric ID of the metric | |
| * \param eventIdArraySizeBytes The size of \p eventIdArray in bytes, | |
| * and returns the number of bytes written to \p eventIdArray | |
| * \param eventIdArray Returns the IDs of the events required to | |
| * calculate \p metric | |
| * | |
| * \retval CUPTI_SUCCESS | |
| * \retval CUPTI_ERROR_NOT_INITIALIZED | |
| * \retval CUPTI_ERROR_INVALID_METRIC_ID | |
| * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventIdArraySizeBytes or \p | |
| * eventIdArray are NULL. | |
| */ | |
| CUptiResult CUPTIAPI cuptiMetricEnumEvents(CUpti_MetricID metric, | |
| size_t *eventIdArraySizeBytes, | |
| CUpti_EventID *eventIdArray); | |
| /** | |
| * \brief Get number of properties required to calculate a metric. | |
| * | |
| * Returns the number of properties in \p numProp that are required to | |
| * calculate a metric. | |
| * | |
| * \param metric ID of the metric | |
| * \param numProp Returns the number of properties required for the | |
| * metric | |
| * | |
| * \retval CUPTI_SUCCESS | |
| * \retval CUPTI_ERROR_NOT_INITIALIZED | |
| * \retval CUPTI_ERROR_INVALID_METRIC_ID | |
| * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numProp is NULL | |
| */ | |
| CUptiResult CUPTIAPI cuptiMetricGetNumProperties(CUpti_MetricID metric, | |
| uint32_t *numProp); | |
| /** | |
| * \brief Get the properties required to calculating a metric. | |
| * | |
| * Gets the property IDs in \p propIdArray required to calculate a \p | |
| * metric. The size of the \p propIdArray buffer is given by \p | |
| * *propIdArraySizeBytes and must be at least \p numProp * | |
| * sizeof(CUpti_DeviceAttribute) or all properties will not be | |
| * returned. The value returned in \p *propIdArraySizeBytes contains | |
| * the number of bytes returned in \p propIdArray. | |
| * | |
| * \param metric ID of the metric | |
| * \param propIdArraySizeBytes The size of \p propIdArray in bytes, | |
| * and returns the number of bytes written to \p propIdArray | |
| * \param propIdArray Returns the IDs of the properties required to | |
| * calculate \p metric | |
| * | |
| * \retval CUPTI_SUCCESS | |
| * \retval CUPTI_ERROR_NOT_INITIALIZED | |
| * \retval CUPTI_ERROR_INVALID_METRIC_ID | |
| * \retval CUPTI_ERROR_INVALID_PARAMETER if \p propIdArraySizeBytes or \p | |
| * propIdArray are NULL. | |
| */ | |
| CUptiResult CUPTIAPI cuptiMetricEnumProperties(CUpti_MetricID metric, | |
| size_t *propIdArraySizeBytes, | |
| CUpti_MetricPropertyID *propIdArray); | |
| /** | |
| * \brief For a metric get the groups of events that must be collected | |
| * in the same pass. | |
| * | |
| * For a metric get the groups of events that must be collected in the | |
| * same pass to ensure that the metric is calculated correctly. If the | |
| * events are not collected as specified then the metric value may be | |
| * inaccurate. | |
| * | |
| * The function returns NULL if a metric does not have any required | |
| * event group. In this case the events needed for the metric can be | |
| * grouped in any manner for collection. | |
| * | |
| * \param context The context for event collection | |
| * \param metric The metric ID | |
| * \param eventGroupSets Returns a CUpti_EventGroupSets object that | |
| * indicates the events that must be collected in the same pass to | |
| * ensure the metric is calculated correctly. Returns NULL if no | |
| * grouping is required for metric | |
| * \retval CUPTI_SUCCESS | |
| * \retval CUPTI_ERROR_NOT_INITIALIZED | |
| * \retval CUPTI_ERROR_INVALID_METRIC_ID | |
| */ | |
| CUptiResult CUPTIAPI cuptiMetricGetRequiredEventGroupSets(CUcontext context, | |
| CUpti_MetricID metric, | |
| CUpti_EventGroupSets **eventGroupSets); | |
| /** | |
| * \brief For a set of metrics, get the grouping that indicates the | |
| * number of passes and the event groups necessary to collect the | |
| * events required for those metrics. | |
| * | |
| * For a set of metrics, get the grouping that indicates the number of | |
| * passes and the event groups necessary to collect the events | |
| * required for those metrics. | |
| * | |
| * \see cuptiEventGroupSetsCreate for details on event group set | |
| * creation. | |
| * | |
| * \param context The context for event collection | |
| * \param metricIdArraySizeBytes Size of the metricIdArray in bytes | |
| * \param metricIdArray Array of metric IDs | |
| * \param eventGroupPasses Returns a CUpti_EventGroupSets object that | |
| * indicates the number of passes required to collect the events and | |
| * the events to collect on each pass | |
| * | |
| * \retval CUPTI_SUCCESS | |
| * \retval CUPTI_ERROR_NOT_INITIALIZED | |
| * \retval CUPTI_ERROR_INVALID_CONTEXT | |
| * \retval CUPTI_ERROR_INVALID_METRIC_ID | |
| * \retval CUPTI_ERROR_INVALID_PARAMETER if \p metricIdArray or | |
| * \p eventGroupPasses is NULL | |
| */ | |
| CUptiResult CUPTIAPI cuptiMetricCreateEventGroupSets(CUcontext context, | |
| size_t metricIdArraySizeBytes, | |
| CUpti_MetricID *metricIdArray, | |
| CUpti_EventGroupSets **eventGroupPasses); | |
| /** | |
| * \brief Calculate the value for a metric. | |
| * | |
| * Use the events collected for a metric to calculate the metric | |
| * value. Metric value evaluation depends on the evaluation mode | |
| * \ref CUpti_MetricEvaluationMode that the metric supports. | |
| * If a metric has evaluation mode as CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE, | |
| * then it assumes that the input event value is for one domain instance. | |
| * If a metric has evaluation mode as CUPTI_METRIC_EVALUATION_MODE_AGGREGATE, | |
| * it assumes that input event values are | |
| * normalized to represent all domain instances on a device. For the | |
| * most accurate metric collection, the events required for the metric | |
| * should be collected for all profiled domain instances. For example, | |
| * to collect all instances of an event, set the | |
| * CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES attribute on | |
| * the group containing the event to 1. The normalized value for the | |
| * event is then: (\p sum_event_values * \p totalInstanceCount) / \p | |
| * instanceCount, where \p sum_event_values is the summation of the | |
| * event values across all profiled domain instances, \p | |
| * totalInstanceCount is obtained from querying | |
| * CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT and \p instanceCount | |
| * is obtained from querying CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT (or | |
| * CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT). | |
| * | |
| * \param device The CUDA device that the metric is being calculated for | |
| * \param metric The metric ID | |
| * \param eventIdArraySizeBytes The size of \p eventIdArray in bytes | |
| * \param eventIdArray The event IDs required to calculate \p metric | |
| * \param eventValueArraySizeBytes The size of \p eventValueArray in bytes | |
| * \param eventValueArray The normalized event values required to | |
| * calculate \p metric. The values must be order to match the order of | |
| * events in \p eventIdArray | |
| * \param timeDuration The duration over which the events were | |
| * collected, in ns | |
| * \param metricValue Returns the value for the metric | |
| * | |
| * \retval CUPTI_SUCCESS | |
| * \retval CUPTI_ERROR_NOT_INITIALIZED | |
| * \retval CUPTI_ERROR_INVALID_METRIC_ID | |
| * \retval CUPTI_ERROR_INVALID_OPERATION | |
| * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if the | |
| * eventIdArray does not contain all the events needed for metric | |
| * \retval CUPTI_ERROR_INVALID_EVENT_VALUE if any of the | |
| * event values required for the metric is CUPTI_EVENT_OVERFLOW | |
| * \retval CUPTI_ERROR_INVALID_METRIC_VALUE if the computed metric value | |
| * cannot be represented in the metric's value type. For example, | |
| * if the metric value type is unsigned and the computed metric value is negative | |
| * \retval CUPTI_ERROR_INVALID_PARAMETER if \p metricValue, | |
| * \p eventIdArray or \p eventValueArray is NULL | |
| */ | |
| CUptiResult CUPTIAPI cuptiMetricGetValue(CUdevice device, | |
| CUpti_MetricID metric, | |
| size_t eventIdArraySizeBytes, | |
| CUpti_EventID *eventIdArray, | |
| size_t eventValueArraySizeBytes, | |
| uint64_t *eventValueArray, | |
| uint64_t timeDuration, | |
| CUpti_MetricValue *metricValue); | |
| /** | |
| * \brief Calculate the value for a metric. | |
| * | |
| * Use the events and properties collected for a metric to calculate | |
| * the metric value. Metric value evaluation depends on the evaluation | |
| * mode \ref CUpti_MetricEvaluationMode that the metric supports. If | |
| * a metric has evaluation mode as | |
| * CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE, then it assumes that the | |
| * input event value is for one domain instance. If a metric has | |
| * evaluation mode as CUPTI_METRIC_EVALUATION_MODE_AGGREGATE, it | |
| * assumes that input event values are normalized to represent all | |
| * domain instances on a device. For the most accurate metric | |
| * collection, the events required for the metric should be collected | |
| * for all profiled domain instances. For example, to collect all | |
| * instances of an event, set the | |
| * CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES attribute on | |
| * the group containing the event to 1. The normalized value for the | |
| * event is then: (\p sum_event_values * \p totalInstanceCount) / \p | |
| * instanceCount, where \p sum_event_values is the summation of the | |
| * event values across all profiled domain instances, \p | |
| * totalInstanceCount is obtained from querying | |
| * CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT and \p instanceCount | |
| * is obtained from querying CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT (or | |
| * CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT). | |
| * | |
| * \param metric The metric ID | |
| * \param eventIdArraySizeBytes The size of \p eventIdArray in bytes | |
| * \param eventIdArray The event IDs required to calculate \p metric | |
| * \param eventValueArraySizeBytes The size of \p eventValueArray in bytes | |
| * \param eventValueArray The normalized event values required to | |
| * calculate \p metric. The values must be order to match the order of | |
| * events in \p eventIdArray | |
| * \param propIdArraySizeBytes The size of \p propIdArray in bytes | |
| * \param propIdArray The metric property IDs required to calculate \p metric | |
| * \param propValueArraySizeBytes The size of \p propValueArray in bytes | |
| * \param propValueArray The metric property values required to | |
| * calculate \p metric. The values must be order to match the order of | |
| * metric properties in \p propIdArray | |
| * \param metricValue Returns the value for the metric | |
| * | |
| * \retval CUPTI_SUCCESS | |
| * \retval CUPTI_ERROR_NOT_INITIALIZED | |
| * \retval CUPTI_ERROR_INVALID_METRIC_ID | |
| * \retval CUPTI_ERROR_INVALID_OPERATION | |
| * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if the | |
| * eventIdArray does not contain all the events needed for metric | |
| * \retval CUPTI_ERROR_INVALID_EVENT_VALUE if any of the | |
| * event values required for the metric is CUPTI_EVENT_OVERFLOW | |
| * \retval CUPTI_ERROR_NOT_COMPATIBLE if the computed metric value | |
| * cannot be represented in the metric's value type. For example, | |
| * if the metric value type is unsigned and the computed metric value is negative | |
| * \retval CUPTI_ERROR_INVALID_PARAMETER if \p metricValue, | |
| * \p eventIdArray or \p eventValueArray is NULL | |
| */ | |
| CUptiResult CUPTIAPI cuptiMetricGetValue2(CUpti_MetricID metric, | |
| size_t eventIdArraySizeBytes, | |
| CUpti_EventID *eventIdArray, | |
| size_t eventValueArraySizeBytes, | |
| uint64_t *eventValueArray, | |
| size_t propIdArraySizeBytes, | |
| CUpti_MetricPropertyID *propIdArray, | |
| size_t propValueArraySizeBytes, | |
| uint64_t *propValueArray, | |
| CUpti_MetricValue *metricValue); | |
| /** @} */ /* END CUPTI_METRIC_API */ | |
| } | |