|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <cupti_target.h> |
|
|
#include <cupti_profiler_target.h> |
|
|
#include <nvperf_host.h> |
|
|
#include <cuda.h> |
|
|
#include <cuda_runtime.h> |
|
|
#include <string> |
|
|
#include <cstring> |
|
|
#include <stdio.h> |
|
|
#include <stdlib.h> |
|
|
#include <Metric.h> |
|
|
#include <Eval.h> |
|
|
#include <Utils.h> |
|
|
#include <FileOp.h> |
|
|
|
|
|
#ifndef EXIT_WAIVED |
|
|
#define EXIT_WAIVED 2 |
|
|
#endif |
|
|
|
|
|
#define CUPTI_API_CALL(apiFuncCall) \ |
|
|
do { \ |
|
|
CUptiResult _status = apiFuncCall; \ |
|
|
if (_status != CUPTI_SUCCESS) { \ |
|
|
const char *errstr; \ |
|
|
cuptiGetResultString(_status, &errstr); \ |
|
|
fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ |
|
|
__FILE__, __LINE__, #apiFuncCall, errstr); \ |
|
|
exit(EXIT_FAILURE); \ |
|
|
} \ |
|
|
} while (0) |
|
|
|
|
|
#define DRIVER_API_CALL(apiFuncCall) \ |
|
|
do { \ |
|
|
CUresult _status = apiFuncCall; \ |
|
|
if (_status != CUDA_SUCCESS) { \ |
|
|
fprintf(stderr, "%s:%d: error: function %s failed with error %d.\n", \ |
|
|
__FILE__, __LINE__, #apiFuncCall, _status); \ |
|
|
exit(EXIT_FAILURE); \ |
|
|
} \ |
|
|
} while (0) |
|
|
|
|
|
#define RUNTIME_API_CALL(apiFuncCall) \ |
|
|
do { \ |
|
|
cudaError_t _status = apiFuncCall; \ |
|
|
if (_status != cudaSuccess) { \ |
|
|
fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ |
|
|
__FILE__, __LINE__, #apiFuncCall, cudaGetErrorString(_status));\ |
|
|
exit(EXIT_FAILURE); \ |
|
|
} \ |
|
|
} while (0) |
|
|
|
|
|
#define MEMORY_ALLOCATION_CALL(var) \ |
|
|
do { \ |
|
|
if (var == NULL) { \ |
|
|
fprintf(stderr, "%s:%d: Error: Memory Allocation Failed \n", \ |
|
|
__FILE__, __LINE__); \ |
|
|
exit(EXIT_FAILURE); \ |
|
|
} \ |
|
|
} while (0) |
|
|
|
|
|
static int numRanges = 2; |
|
|
static int numNestingLevels = 2; |
|
|
#define DEFAULT_METRIC_NAME "sm__ctas_launched.sum" |
|
|
|
|
|
|
|
|
__global__ |
|
|
void VecAdd(const int* A, const int* B, int* C, int N) |
|
|
{ |
|
|
int i = blockDim.x * blockIdx.x + threadIdx.x; |
|
|
if (i < N) |
|
|
C[i] = A[i] + B[i]; |
|
|
} |
|
|
|
|
|
|
|
|
__global__ |
|
|
void VecSub(const int* A, const int* B, int* C, int N) |
|
|
{ |
|
|
int i = blockDim.x * blockIdx.x + threadIdx.x; |
|
|
if (i < N) |
|
|
C[i] = A[i] - B[i]; |
|
|
} |
|
|
|
|
|
enum class eVectorOperationType |
|
|
{ |
|
|
VEC_ADD, |
|
|
VEC_SUB |
|
|
}; |
|
|
|
|
|
static void initVec(int *vec, int n) |
|
|
{ |
|
|
for (int i=0; i< n; i++) |
|
|
vec[i] = i; |
|
|
} |
|
|
|
|
|
static void cleanUp(int *h_A, int *h_B, int *h_C, int *d_A, int *d_B, int *d_C) |
|
|
{ |
|
|
if (d_A) |
|
|
RUNTIME_API_CALL(cudaFree(d_A)); |
|
|
if (d_B) |
|
|
RUNTIME_API_CALL(cudaFree(d_B)); |
|
|
if (d_C) |
|
|
RUNTIME_API_CALL(cudaFree(d_C)); |
|
|
|
|
|
|
|
|
if (h_A) |
|
|
free(h_A); |
|
|
if (h_B) |
|
|
free(h_B); |
|
|
if (h_C) |
|
|
free(h_C); |
|
|
} |
|
|
|
|
|
static void VectorProcess(int numOfElements, eVectorOperationType operationType) |
|
|
{ |
|
|
size_t size = numOfElements * sizeof(int); |
|
|
int threadsPerBlock = 0; |
|
|
int blocksPerGrid = 0; |
|
|
int *h_A, *h_B, *h_C; |
|
|
int *d_A, *d_B, *d_C; |
|
|
int i, res; |
|
|
|
|
|
|
|
|
h_A = (int*)malloc(size); |
|
|
MEMORY_ALLOCATION_CALL(h_A); |
|
|
h_B = (int*)malloc(size); |
|
|
MEMORY_ALLOCATION_CALL(h_B); |
|
|
h_C = (int*)malloc(size); |
|
|
MEMORY_ALLOCATION_CALL(h_C); |
|
|
|
|
|
|
|
|
initVec(h_A, numOfElements); |
|
|
initVec(h_B, numOfElements); |
|
|
memset(h_C, 0, size); |
|
|
|
|
|
|
|
|
RUNTIME_API_CALL(cudaMalloc((void**)&d_A, size)); |
|
|
RUNTIME_API_CALL(cudaMalloc((void**)&d_B, size)); |
|
|
RUNTIME_API_CALL(cudaMalloc((void**)&d_C, size)); |
|
|
|
|
|
|
|
|
RUNTIME_API_CALL(cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice)); |
|
|
RUNTIME_API_CALL(cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice)); |
|
|
|
|
|
|
|
|
threadsPerBlock = 256; |
|
|
blocksPerGrid = (numOfElements + threadsPerBlock - 1) / threadsPerBlock; |
|
|
|
|
|
if (operationType == eVectorOperationType::VEC_ADD) |
|
|
{ |
|
|
printf("Launching VecAdd kernel: blocks %d, thread/block %d\n", blocksPerGrid, threadsPerBlock); |
|
|
VecAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, numOfElements); |
|
|
RUNTIME_API_CALL(cudaGetLastError()); |
|
|
} |
|
|
|
|
|
if (operationType == eVectorOperationType::VEC_SUB) |
|
|
{ |
|
|
printf("Launching VecSub kernel: blocks %d, thread/block %d\n", blocksPerGrid, threadsPerBlock); |
|
|
VecSub<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, numOfElements); |
|
|
RUNTIME_API_CALL(cudaGetLastError()); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
RUNTIME_API_CALL(cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost)); |
|
|
|
|
|
|
|
|
for (i = 0; i < numOfElements; ++i) |
|
|
{ |
|
|
if (operationType == eVectorOperationType::VEC_ADD) |
|
|
res = h_A[i] + h_B[i]; |
|
|
|
|
|
if (operationType == eVectorOperationType::VEC_SUB) |
|
|
res = h_A[i] - h_B[i]; |
|
|
|
|
|
if (h_C[i] != res) |
|
|
{ |
|
|
fprintf(stderr, "error: result verification failed\n"); |
|
|
exit(EXIT_FAILURE); |
|
|
} |
|
|
} |
|
|
|
|
|
cleanUp(h_A, h_B, h_C, d_A, d_B, d_C); |
|
|
} |
|
|
|
|
|
bool CreateCounterDataImage( |
|
|
std::vector<uint8_t>& counterDataImage, |
|
|
std::vector<uint8_t>& counterDataScratchBuffer, |
|
|
std::vector<uint8_t>& counterDataImagePrefix) |
|
|
{ |
|
|
CUpti_Profiler_CounterDataImageOptions counterDataImageOptions; |
|
|
counterDataImageOptions.pCounterDataPrefix = &counterDataImagePrefix[0]; |
|
|
counterDataImageOptions.counterDataPrefixSize = counterDataImagePrefix.size(); |
|
|
counterDataImageOptions.maxNumRanges = numRanges; |
|
|
counterDataImageOptions.maxNumRangeTreeNodes = numRanges; |
|
|
counterDataImageOptions.maxRangeNameLength = 64; |
|
|
|
|
|
CUpti_Profiler_CounterDataImage_CalculateSize_Params calculateSizeParams = {CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE}; |
|
|
calculateSizeParams.pOptions = &counterDataImageOptions; |
|
|
calculateSizeParams.sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; |
|
|
CUPTI_API_CALL(cuptiProfilerCounterDataImageCalculateSize(&calculateSizeParams)); |
|
|
|
|
|
CUpti_Profiler_CounterDataImage_Initialize_Params initializeParams = {CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE}; |
|
|
initializeParams.sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; |
|
|
initializeParams.pOptions = &counterDataImageOptions; |
|
|
initializeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize; |
|
|
counterDataImage.resize(calculateSizeParams.counterDataImageSize); |
|
|
initializeParams.pCounterDataImage = &counterDataImage[0]; |
|
|
CUPTI_API_CALL(cuptiProfilerCounterDataImageInitialize(&initializeParams)); |
|
|
|
|
|
CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params scratchBufferSizeParams = {CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE}; |
|
|
scratchBufferSizeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize; |
|
|
scratchBufferSizeParams.pCounterDataImage = initializeParams.pCounterDataImage; |
|
|
CUPTI_API_CALL(cuptiProfilerCounterDataImageCalculateScratchBufferSize(&scratchBufferSizeParams)); |
|
|
|
|
|
counterDataScratchBuffer.resize(scratchBufferSizeParams.counterDataScratchBufferSize); |
|
|
CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params initScratchBufferParams = {CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE}; |
|
|
initScratchBufferParams.counterDataImageSize = calculateSizeParams.counterDataImageSize; |
|
|
initScratchBufferParams.pCounterDataImage = initializeParams.pCounterDataImage; |
|
|
initScratchBufferParams.counterDataScratchBufferSize = scratchBufferSizeParams.counterDataScratchBufferSize; |
|
|
initScratchBufferParams.pCounterDataScratchBuffer = &counterDataScratchBuffer[0]; |
|
|
CUPTI_API_CALL(cuptiProfilerCounterDataImageInitializeScratchBuffer(&initScratchBufferParams)); |
|
|
|
|
|
return true; |
|
|
} |
|
|
|
|
|
bool runTest(std::vector<uint8_t>& configImage, |
|
|
std::vector<uint8_t>& counterDataScratchBuffer, |
|
|
std::vector<uint8_t>& counterDataImage, |
|
|
CUpti_ProfilerReplayMode profilerReplayMode, |
|
|
CUpti_ProfilerRange profilerRange) |
|
|
{ |
|
|
CUcontext cuContext; |
|
|
DRIVER_API_CALL(cuCtxGetCurrent(&cuContext)); |
|
|
|
|
|
CUpti_Profiler_BeginSession_Params beginSessionParams = { CUpti_Profiler_BeginSession_Params_STRUCT_SIZE }; |
|
|
beginSessionParams.ctx = cuContext; |
|
|
beginSessionParams.counterDataImageSize = counterDataImage.size(); |
|
|
beginSessionParams.pCounterDataImage = &counterDataImage[0]; |
|
|
beginSessionParams.counterDataScratchBufferSize = counterDataScratchBuffer.size(); |
|
|
beginSessionParams.pCounterDataScratchBuffer = &counterDataScratchBuffer[0]; |
|
|
beginSessionParams.range = profilerRange; |
|
|
beginSessionParams.replayMode = profilerReplayMode; |
|
|
beginSessionParams.maxRangesPerPass = numRanges; |
|
|
beginSessionParams.maxLaunchesPerPass = numRanges; |
|
|
CUPTI_API_CALL(cuptiProfilerBeginSession(&beginSessionParams)); |
|
|
|
|
|
CUpti_Profiler_SetConfig_Params setConfigParams = { CUpti_Profiler_SetConfig_Params_STRUCT_SIZE }; |
|
|
setConfigParams.pConfig = &configImage[0]; |
|
|
setConfigParams.configSize = configImage.size(); |
|
|
setConfigParams.passIndex = 0; |
|
|
setConfigParams.minNestingLevel = 1; |
|
|
setConfigParams.numNestingLevels = numNestingLevels; |
|
|
CUPTI_API_CALL(cuptiProfilerSetConfig(&setConfigParams)); |
|
|
|
|
|
|
|
|
CUpti_Profiler_BeginPass_Params beginPassParams = { CUpti_Profiler_BeginPass_Params_STRUCT_SIZE }; |
|
|
CUpti_Profiler_EndPass_Params endPassParams = { CUpti_Profiler_EndPass_Params_STRUCT_SIZE }; |
|
|
do |
|
|
{ |
|
|
CUPTI_API_CALL(cuptiProfilerBeginPass(&beginPassParams)); |
|
|
{ |
|
|
CUpti_Profiler_EnableProfiling_Params enableProfilingParams = { CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE }; |
|
|
CUPTI_API_CALL(cuptiProfilerEnableProfiling(&enableProfilingParams)); |
|
|
|
|
|
CUpti_Profiler_PushRange_Params pushRangeParams = { CUpti_Profiler_PushRange_Params_STRUCT_SIZE }; |
|
|
pushRangeParams.pRangeName = "userRangeA"; |
|
|
printf("\nStart of userRangeA\n"); |
|
|
CUPTI_API_CALL(cuptiProfilerPushRange(&pushRangeParams)); |
|
|
{ |
|
|
VectorProcess(50000, eVectorOperationType::VEC_ADD); |
|
|
|
|
|
pushRangeParams.pRangeName = "userRangeB"; |
|
|
printf("Start of userRangeB\n"); |
|
|
CUPTI_API_CALL(cuptiProfilerPushRange(&pushRangeParams)); |
|
|
{ |
|
|
VectorProcess(10000, eVectorOperationType::VEC_SUB); |
|
|
} |
|
|
CUpti_Profiler_PopRange_Params popRangeParams = { CUpti_Profiler_PopRange_Params_STRUCT_SIZE }; |
|
|
printf("End of userRangeB\n"); |
|
|
CUPTI_API_CALL(cuptiProfilerPopRange(&popRangeParams)); |
|
|
|
|
|
} |
|
|
CUpti_Profiler_PopRange_Params popRangeParams = { CUpti_Profiler_PopRange_Params_STRUCT_SIZE }; |
|
|
printf("End of userRangeA\n"); |
|
|
CUPTI_API_CALL(cuptiProfilerPopRange(&popRangeParams)); |
|
|
|
|
|
CUpti_Profiler_DisableProfiling_Params disableProfilingParams = { CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE }; |
|
|
CUPTI_API_CALL(cuptiProfilerDisableProfiling(&disableProfilingParams)); |
|
|
} |
|
|
CUPTI_API_CALL(cuptiProfilerEndPass(&endPassParams)); |
|
|
} while (!endPassParams.allPassesSubmitted); |
|
|
|
|
|
CUpti_Profiler_FlushCounterData_Params flushCounterDataParams = {CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE}; |
|
|
CUPTI_API_CALL(cuptiProfilerFlushCounterData(&flushCounterDataParams)); |
|
|
|
|
|
CUpti_Profiler_UnsetConfig_Params unsetConfigParams = {CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE}; |
|
|
CUPTI_API_CALL(cuptiProfilerUnsetConfig(&unsetConfigParams)); |
|
|
|
|
|
CUpti_Profiler_EndSession_Params endSessionParams = {CUpti_Profiler_EndSession_Params_STRUCT_SIZE}; |
|
|
CUPTI_API_CALL(cuptiProfilerEndSession(&endSessionParams)); |
|
|
|
|
|
return true; |
|
|
} |
|
|
|
|
|
int main(int argc, char* argv[]) |
|
|
{ |
|
|
CUdevice cuDevice; |
|
|
std::vector<std::string> metricNames; |
|
|
std::vector<uint8_t> counterDataImagePrefix; |
|
|
std::vector<uint8_t> configImage; |
|
|
std::vector<uint8_t> counterDataImage; |
|
|
std::vector<uint8_t> counterDataScratchBuffer; |
|
|
std::vector<uint8_t> counterAvailabilityImage; |
|
|
std::string CounterDataFileName("SimpleCupti.counterdata"); |
|
|
std::string CounterDataSBFileName("SimpleCupti.counterdataSB"); |
|
|
CUpti_ProfilerReplayMode profilerReplayMode = CUPTI_UserReplay; |
|
|
CUpti_ProfilerRange profilerRange = CUPTI_UserRange; |
|
|
char* metricName; |
|
|
int deviceCount, deviceNum; |
|
|
int computeCapabilityMajor = 0, computeCapabilityMinor = 0; |
|
|
|
|
|
printf("Usage: %s [device_num] [metric_names comma separated]\n", argv[0]); |
|
|
|
|
|
DRIVER_API_CALL(cuInit(0)); |
|
|
DRIVER_API_CALL(cuDeviceGetCount(&deviceCount)); |
|
|
|
|
|
if (deviceCount == 0) |
|
|
{ |
|
|
printf("There is no device supporting CUDA.\n"); |
|
|
exit(EXIT_WAIVED); |
|
|
} |
|
|
|
|
|
if (argc > 1) |
|
|
deviceNum = atoi(argv[1]); |
|
|
else |
|
|
deviceNum = 0; |
|
|
printf("CUDA Device Number: %d\n", deviceNum); |
|
|
|
|
|
DRIVER_API_CALL(cuDeviceGet(&cuDevice, deviceNum)); |
|
|
DRIVER_API_CALL(cuDeviceGetAttribute(&computeCapabilityMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice)); |
|
|
DRIVER_API_CALL(cuDeviceGetAttribute(&computeCapabilityMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice)); |
|
|
|
|
|
printf("Compute Capability of Device: %d.%d\n", computeCapabilityMajor,computeCapabilityMinor); |
|
|
|
|
|
|
|
|
CUpti_Profiler_Initialize_Params profilerInitializeParams = { CUpti_Profiler_Initialize_Params_STRUCT_SIZE }; |
|
|
CUPTI_API_CALL(cuptiProfilerInitialize(&profilerInitializeParams)); |
|
|
CUpti_Profiler_DeviceSupported_Params params = { CUpti_Profiler_DeviceSupported_Params_STRUCT_SIZE }; |
|
|
params.cuDevice = deviceNum; |
|
|
CUPTI_API_CALL(cuptiProfilerDeviceSupported(¶ms)); |
|
|
|
|
|
if (params.isSupported != CUPTI_PROFILER_CONFIGURATION_SUPPORTED) |
|
|
{ |
|
|
::std::cerr << "Unable to profile on device " << deviceNum << ::std::endl; |
|
|
|
|
|
if (params.architecture == CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED) |
|
|
{ |
|
|
::std::cerr << "\tdevice architecture is not supported" << ::std::endl; |
|
|
} |
|
|
|
|
|
if (params.sli == CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED) |
|
|
{ |
|
|
::std::cerr << "\tdevice sli configuration is not supported" << ::std::endl; |
|
|
} |
|
|
|
|
|
if (params.vGpu == CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED) |
|
|
{ |
|
|
::std::cerr << "\tdevice vgpu configuration is not supported" << ::std::endl; |
|
|
} |
|
|
else if (params.vGpu == CUPTI_PROFILER_CONFIGURATION_DISABLED) |
|
|
{ |
|
|
::std::cerr << "\tdevice vgpu configuration disabled profiling support" << ::std::endl; |
|
|
} |
|
|
|
|
|
if (params.confidentialCompute == CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED) |
|
|
{ |
|
|
::std::cerr << "\tdevice confidential compute configuration is not supported" << ::std::endl; |
|
|
} |
|
|
|
|
|
if (params.cmp == CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED) |
|
|
{ |
|
|
::std::cerr << "\tNVIDIA Crypto Mining Processors (CMP) are not supported" << ::std::endl; |
|
|
} |
|
|
exit(EXIT_WAIVED); |
|
|
} |
|
|
|
|
|
|
|
|
if (argc > 2) |
|
|
{ |
|
|
metricName = strtok(argv[2], ","); |
|
|
while(metricName != NULL) |
|
|
{ |
|
|
metricNames.push_back(metricName); |
|
|
metricName = strtok(NULL, ","); |
|
|
} |
|
|
} |
|
|
else { |
|
|
metricNames.push_back(DEFAULT_METRIC_NAME); |
|
|
} |
|
|
|
|
|
CUcontext cuContext; |
|
|
DRIVER_API_CALL(cuCtxCreate(&cuContext, 0, cuDevice)); |
|
|
|
|
|
|
|
|
CUpti_Device_GetChipName_Params getChipNameParams = { CUpti_Device_GetChipName_Params_STRUCT_SIZE }; |
|
|
getChipNameParams.deviceIndex = deviceNum; |
|
|
CUPTI_API_CALL(cuptiDeviceGetChipName(&getChipNameParams)); |
|
|
std::string chipName(getChipNameParams.pChipName); |
|
|
|
|
|
CUpti_Profiler_GetCounterAvailability_Params getCounterAvailabilityParams = { CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE }; |
|
|
getCounterAvailabilityParams.ctx = cuContext; |
|
|
CUPTI_API_CALL(cuptiProfilerGetCounterAvailability(&getCounterAvailabilityParams)); |
|
|
|
|
|
counterAvailabilityImage.clear(); |
|
|
counterAvailabilityImage.resize(getCounterAvailabilityParams.counterAvailabilityImageSize); |
|
|
getCounterAvailabilityParams.pCounterAvailabilityImage = counterAvailabilityImage.data(); |
|
|
CUPTI_API_CALL(cuptiProfilerGetCounterAvailability(&getCounterAvailabilityParams)); |
|
|
|
|
|
|
|
|
NVPW_InitializeHost_Params initializeHostParams = { NVPW_InitializeHost_Params_STRUCT_SIZE }; |
|
|
RETURN_IF_NVPW_ERROR(0, NVPW_InitializeHost(&initializeHostParams)); |
|
|
|
|
|
if (metricNames.size()) |
|
|
{ |
|
|
if (!NV::Metric::Config::GetConfigImage(chipName, metricNames, configImage, counterAvailabilityImage.data())) |
|
|
{ |
|
|
std::cerr << "Failed to create configImage" << std::endl; |
|
|
exit(EXIT_FAILURE); |
|
|
} |
|
|
if (!NV::Metric::Config::GetCounterDataPrefixImage(chipName, metricNames, counterDataImagePrefix)) |
|
|
{ |
|
|
std::cerr << "Failed to create counterDataImagePrefix" << std::endl; |
|
|
exit(EXIT_FAILURE); |
|
|
} |
|
|
} |
|
|
else |
|
|
{ |
|
|
std::cerr << "No metrics provided to profile" << std::endl; |
|
|
exit(EXIT_FAILURE); |
|
|
} |
|
|
|
|
|
if (!CreateCounterDataImage(counterDataImage, counterDataScratchBuffer, counterDataImagePrefix)) |
|
|
{ |
|
|
std::cerr << "Failed to create counterDataImage" << std::endl; |
|
|
exit(EXIT_FAILURE); |
|
|
} |
|
|
|
|
|
if (!runTest(configImage, counterDataScratchBuffer, counterDataImage, profilerReplayMode, profilerRange)) |
|
|
{ |
|
|
std::cerr << "Failed to run sample" << std::endl; |
|
|
exit(EXIT_FAILURE); |
|
|
} |
|
|
|
|
|
CUpti_Profiler_DeInitialize_Params profilerDeInitializeParams = {CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE}; |
|
|
CUPTI_API_CALL(cuptiProfilerDeInitialize(&profilerDeInitializeParams)); |
|
|
DRIVER_API_CALL(cuCtxDestroy(cuContext)); |
|
|
|
|
|
|
|
|
WriteBinaryFile(CounterDataFileName.c_str(), counterDataImage); |
|
|
WriteBinaryFile(CounterDataSBFileName.c_str(), counterDataScratchBuffer); |
|
|
|
|
|
|
|
|
NV::Metric::Eval::PrintMetricValues(chipName, counterDataImage, metricNames); |
|
|
exit(EXIT_SUCCESS); |
|
|
} |
|
|
|