#if !defined(_PC_SAMPLING_UTILITY_HELPER_H_) #define _PC_SAMPLING_UTILITY_HELPER_H_ #include #include #include #include #include #include #include #include #include using namespace CUPTI::PcSamplingUtil; #ifndef EXIT_WAIVED #define EXIT_WAIVED 2 #endif #if defined(WIN32) || defined(_WIN32) #define stricmp _stricmp #else #define stricmp strcasecmp #endif #ifndef CUPTI_CALL #define CUPTI_CALL(call) \ { \ CUptiResult _status = call; \ if (_status != CUPTI_SUCCESS) \ { \ const char* errstr; \ cuptiGetResultString(_status, &errstr); \ fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ __FILE__, \ __LINE__, \ #call, \ errstr); \ exit(EXIT_FAILURE); \ } \ } #endif #define CUPTI_UTIL_CALL(call) \ { \ CUptiUtilResult _status = call; \ if (_status != CUPTI_UTIL_SUCCESS) \ { \ fprintf(stderr, "%s:%d: error: function %s failed with error %d.\n", \ __FILE__, \ __LINE__, \ #call, \ _status); \ exit(EXIT_FAILURE); \ } \ } #define MEMORY_ALLOCATION_CALL(var) \ do { \ if (var == NULL) \ { \ fprintf(stderr, "%s:%d: Error: Memory Allocation Failed \n", \ __FILE__, __LINE__); \ exit(EXIT_FAILURE); \ } \ } while (0) typedef struct moduleDetails { uint32_t cubinSize; void* cubinImage; } ModuleDetails; std::string fileName; PcSamplingStallReasons pcSamplingStallReasonsRetrieve; std::vector buffersRetrievedDataVector; std::map crcModuleMap; CUpti_PCSamplingCollectionMode collectionMode; bool disableMerge; bool disablePcInfoPrints ; bool disableSourceCorrelation; bool verbose; static void Init() { fileName = ""; pcSamplingStallReasonsRetrieve = {}; collectionMode = CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS; disableMerge = false; disablePcInfoPrints = false; disableSourceCorrelation = false; verbose = false; } static void PrintUsage() { printf("usage: pc_sampling_utility\n"); printf(" --help : displays help message\n"); printf(" --file-name : Name of the file to parse and print data\n"); printf(" --disable-merge : Disable merge of buffers\n"); printf(" --disable-pc-info-prints : Disable PC records info prints\n"); printf(" --disable-source-correlation : Disable Source correlation\n"); printf(" --verbose : Enable verbose prints\n"); exit(EXIT_SUCCESS); } static void ParseCommandLineArgs(int argc, char *argv[]) { if (argc < 2) { std::cout << "Pass file to parse." << std::endl; PrintUsage(); } for (int i=1; i pcSamplingConfigurationInfoRetrieve; pcSamplingConfigurationInfoRetrieve.push_back(getSampPeriod); pcSamplingConfigurationInfoRetrieve.push_back(getStallReason); pcSamplingConfigurationInfoRetrieve.push_back(getScratchBufferSize); pcSamplingConfigurationInfoRetrieve.push_back(getHwBufferSize); pcSamplingConfigurationInfoRetrieve.push_back(getCollectionMode); pcSamplingConfigurationInfoRetrieve.push_back(getEnableStartStop); pcSamplingConfigurationInfoRetrieve.push_back(getOutputDataFormat); CUptiUtil_GetPcSampDataParams getPcSampDataParams = {}; getPcSampDataParams.size = CUptiUtil_GetPcSampDataParamsSize; getPcSampDataParams.fileHandler = &fileHandler; getPcSampDataParams.bufferType = PC_SAMPLING_BUFFER_PC_TO_COUNTER_DATA; getPcSampDataParams.pBufferInfoData = &getBufferInfoParams.bufferInfoData; getPcSampDataParams.pSamplingData = (void*)&buffersRereivedData; getPcSampDataParams.numAttributes = pcSamplingConfigurationInfoRetrieve.size(); getPcSampDataParams.pPCSamplingConfigurationInfo = pcSamplingConfigurationInfoRetrieve.data(); getPcSampDataParams.pPcSamplingStallReasons = &pcSamplingStallReasonsRetrieve; CUPTI_UTIL_CALL(CuptiUtilGetPcSampData(&getPcSampDataParams)); for (size_t i=0; i::iterator itr; size_t numPcNoCubin = 0; size_t numPcNoLineinfo = 0; for (size_t pcSampBufferIndex = 0; pcSampBufferIndex < numPcSampDataBuffer; pcSampBufferIndex++) { std::cout << "========================== PC Records Buffer Info ==========================" << std::endl; std::cout << "Buffer Number: " << pcSampBufferIndex + 1 << ", Range Id: " << pPcSampDataBuffer[pcSampBufferIndex].rangeId << ", Count of PC records: " << pPcSampDataBuffer[pcSampBufferIndex].totalNumPcs << ", Total Samples: " << pPcSampDataBuffer[pcSampBufferIndex].totalSamples << ", Total Dropped Samples: " << pPcSampDataBuffer[pcSampBufferIndex].droppedSamples; if (CHECK_PC_SAMPLING_STRUCT_FIELD_EXISTS(CUpti_PCSamplingData, nonUsrKernelsTotalSamples, buffersRetrievedDataVector[pcSampBufferIndex].size)) { std::cout << ", Non User Kernels Total Samples: " << buffersRetrievedDataVector[pcSampBufferIndex].nonUsrKernelsTotalSamples; } std::cout << std::endl; for(size_t i=0 ; i < pPcSampDataBuffer[pcSampBufferIndex].totalNumPcs; i++) { // find matching cubinCrc entry in map itr = crcModuleMap.find(pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].cubinCrc); if (itr == crcModuleMap.end()) { numPcNoCubin++; if (!disablePcInfoPrints) { std::cout << "functionName: " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].functionName << ", functionIndex: " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].functionIndex << ", pcOffset: " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].pcOffset << ", lineNumber:0" << ", fileName: " << "ERROR_NO_CUBIN" << ", dirName: " << ", stallReasonCount: " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].stallReasonCount; for (size_t k=0; k < pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].stallReasonCount; k++) { std::cout << ", " << GetStallReason(pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].stallReason[k].pcSamplingStallReasonIndex) << ": " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].stallReason[k].samples; } std::cout << std::endl; } continue; } if (!disablePcInfoPrints) { std::cout << "functionName: " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].functionName << ", functionIndex: " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].functionIndex << ", pcOffset: " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].pcOffset; } CUpti_GetSassToSourceCorrelationParams pCSamplingGetSassToSourceCorrelationParams = {0}; pCSamplingGetSassToSourceCorrelationParams.size = CUpti_GetSassToSourceCorrelationParamsSize; pCSamplingGetSassToSourceCorrelationParams.functionName = pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].functionName; pCSamplingGetSassToSourceCorrelationParams.pcOffset = pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].pcOffset; pCSamplingGetSassToSourceCorrelationParams.cubin = itr->second.cubinImage; pCSamplingGetSassToSourceCorrelationParams.cubinSize = itr->second.cubinSize; CUptiResult cuptiResult = cuptiGetSassToSourceCorrelation(&pCSamplingGetSassToSourceCorrelationParams); if (!disablePcInfoPrints) { if (cuptiResult == CUPTI_SUCCESS) { std::cout << ", lineNumber: " << pCSamplingGetSassToSourceCorrelationParams.lineNumber << ", fileName: " << pCSamplingGetSassToSourceCorrelationParams.fileName << ", dirName: " << pCSamplingGetSassToSourceCorrelationParams.dirName; free(pCSamplingGetSassToSourceCorrelationParams.fileName); free(pCSamplingGetSassToSourceCorrelationParams.dirName); } else { // It is possible that extracted cubins does not have lineinfo. // It is recommended to build application/libraries with nvcc option lineinfo. numPcNoLineinfo++; std::cout << ", lineNumber: 0" << ", fileName: " << "ERROR_NO_LINEINFO" << ", dirName: "; } std::cout << ", stallReasonCount: " <second.cubinImage); } } #endif