| | #if !defined(_PC_SAMPLING_UTILITY_HELPER_H_) |
| | #define _PC_SAMPLING_UTILITY_HELPER_H_ |
| |
|
| | #include <cupti_pcsampling_util.h> |
| | #include <cupti_pcsampling.h> |
| | #include <iostream> |
| | #include <fstream> |
| | #include <string> |
| | #include <vector> |
| | #include <map> |
| | #include <string.h> |
| | #include <stdlib.h> |
| |
|
| | using namespace CUPTI::PcSamplingUtil; |
| |
|
| | #ifndef EXIT_WAIVED |
| | #define EXIT_WAIVED 2 |
| | #endif |
| |
|
| | #if defined(WIN32) || defined(_WIN32) |
| | #define stricmp _stricmp |
| | #else |
| | #define stricmp strcasecmp |
| | #endif |
| |
|
| | #ifndef CUPTI_CALL |
| | #define CUPTI_CALL(call) \ |
| | { \ |
| | CUptiResult _status = call; \ |
| | if (_status != CUPTI_SUCCESS) \ |
| | { \ |
| | const char* errstr; \ |
| | cuptiGetResultString(_status, &errstr); \ |
| | fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ |
| | __FILE__, \ |
| | __LINE__, \ |
| | #call, \ |
| | errstr); \ |
| | exit(EXIT_FAILURE); \ |
| | } \ |
| | } |
| | #endif |
| |
|
| | #define CUPTI_UTIL_CALL(call) \ |
| | { \ |
| | CUptiUtilResult _status = call; \ |
| | if (_status != CUPTI_UTIL_SUCCESS) \ |
| | { \ |
| | fprintf(stderr, "%s:%d: error: function %s failed with error %d.\n", \ |
| | __FILE__, \ |
| | __LINE__, \ |
| | #call, \ |
| | _status); \ |
| | exit(EXIT_FAILURE); \ |
| | } \ |
| | } |
| |
|
| | #define MEMORY_ALLOCATION_CALL(var) \ |
| | do { \ |
| | if (var == NULL) \ |
| | { \ |
| | fprintf(stderr, "%s:%d: Error: Memory Allocation Failed \n", \ |
| | __FILE__, __LINE__); \ |
| | exit(EXIT_FAILURE); \ |
| | } \ |
| | } while (0) |
| |
|
| | typedef struct moduleDetails { |
| | uint32_t cubinSize; |
| | void* cubinImage; |
| | } ModuleDetails; |
| |
|
| | std::string fileName; |
| | PcSamplingStallReasons pcSamplingStallReasonsRetrieve; |
| | std::vector<CUpti_PCSamplingData> buffersRetrievedDataVector; |
| | std::map<uint64_t, ModuleDetails> crcModuleMap; |
| | CUpti_PCSamplingCollectionMode collectionMode; |
| |
|
| | bool disableMerge; |
| | bool disablePcInfoPrints ; |
| | bool disableSourceCorrelation; |
| | bool verbose; |
| |
|
| | static void Init() |
| | { |
| | fileName = ""; |
| | pcSamplingStallReasonsRetrieve = {}; |
| | collectionMode = CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS; |
| |
|
| | disableMerge = false; |
| | disablePcInfoPrints = false; |
| | disableSourceCorrelation = false; |
| | verbose = false; |
| | } |
| |
|
| | static void PrintUsage() |
| | { |
| | printf("usage: pc_sampling_utility\n"); |
| | printf(" --help : displays help message\n"); |
| | printf(" --file-name : Name of the file to parse and print data\n"); |
| | printf(" --disable-merge : Disable merge of buffers\n"); |
| | printf(" --disable-pc-info-prints : Disable PC records info prints\n"); |
| | printf(" --disable-source-correlation : Disable Source correlation\n"); |
| | printf(" --verbose : Enable verbose prints\n"); |
| |
|
| | exit(EXIT_SUCCESS); |
| | } |
| |
|
| | static void ParseCommandLineArgs(int argc, char *argv[]) |
| | { |
| | if (argc < 2) |
| | { |
| | std::cout << "Pass file to parse." << std::endl; |
| | PrintUsage(); |
| | } |
| |
|
| | for (int i=1; i<argc; i++) |
| | { |
| | if ((stricmp(argv[i], "--help") == 0) || (stricmp(argv[i], "-help") == 0)) |
| | { |
| | PrintUsage(); |
| | } |
| | else if ((stricmp(argv[i], "--file-name") == 0) || (stricmp(argv[i], "-file-name") == 0)) |
| | { |
| | if (argc < i+2) |
| | { |
| | std::cout << "ERROR : Pass file to parse." << std::endl; |
| | PrintUsage(); |
| | } |
| | fileName = argv[i+1]; |
| | i++; |
| | } |
| | else if ((stricmp(argv[i], "--disable-merge") == 0) ||(stricmp(argv[i], "-disable-merge") == 0)) |
| | { |
| | disableMerge = true; |
| | } |
| | else if ((stricmp(argv[i], "--disable-pc-info-prints") == 0) ||(stricmp(argv[i], "-disable-pc-info-prints") == 0)) |
| | { |
| | disablePcInfoPrints = true; |
| | } |
| | else if ((stricmp(argv[i], "--disable-source-correlation") == 0) ||(stricmp(argv[i], "-disable-source-correlation") == 0)) |
| | { |
| | disableSourceCorrelation = true; |
| | } |
| | else if ((stricmp(argv[i], "--verbose") == 0) ||(stricmp(argv[i], "-verbose") == 0)) |
| | { |
| | verbose = true; |
| | } |
| | else |
| | { |
| | std::cout << "Unknown option : " << argv[i] << std::endl; |
| | PrintUsage(); |
| | } |
| |
|
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | static std::string GetStallReason(uint32_t pcSamplingStallReasonIndex) |
| | { |
| | for (size_t i = 0; i < pcSamplingStallReasonsRetrieve.numStallReasons; i++) |
| | { |
| | if (pcSamplingStallReasonsRetrieve.stallReasonIndex[i] == pcSamplingStallReasonIndex) |
| | { |
| | return pcSamplingStallReasonsRetrieve.stallReasons[i]; |
| | } |
| | } |
| | return "ERROR_STALL_REASON_INDEX_NOT_FOUND"; |
| | } |
| |
|
| | static void PrintConfigurationDetails(CUptiUtil_GetPcSampDataParams &getPcSampDataParams) |
| | { |
| | std::cout << "========================== Configuration info ==========================" << std::endl; |
| |
|
| | for (size_t i=0; i<getPcSampDataParams.numAttributes; i++) |
| | { |
| | switch (getPcSampDataParams.pPCSamplingConfigurationInfo[i].attributeType) |
| | { |
| | case CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD: |
| | std::cout << "sampling period: " << getPcSampDataParams.pPCSamplingConfigurationInfo[i].attributeData.samplingPeriodData.samplingPeriod << std::endl; |
| | break; |
| | case CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON: |
| | { |
| | std::cout << "selected stall reasons count: " << getPcSampDataParams.pPCSamplingConfigurationInfo[i].attributeData.stallReasonData.stallReasonCount << std::endl; |
| | std::cout << "selected stall reasons: " << std::endl; |
| | for (size_t stallReasonIndex=0; stallReasonIndex < getPcSampDataParams.pPCSamplingConfigurationInfo[i].attributeData.stallReasonData.stallReasonCount; stallReasonIndex++) |
| | { |
| | std::cout << GetStallReason(getPcSampDataParams.pPCSamplingConfigurationInfo[i].attributeData.stallReasonData.pStallReasonIndex[stallReasonIndex]) << ", "; |
| | if ((stallReasonIndex+1) % 5 == 0) |
| | std::cout << std::endl; |
| | } |
| | std::cout << std::endl; |
| | break; |
| | } |
| | case CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SCRATCH_BUFFER_SIZE: |
| | std::cout << "scratch buffer size: " << getPcSampDataParams.pPCSamplingConfigurationInfo[i].attributeData.scratchBufferSizeData.scratchBufferSize << std::endl; |
| | break; |
| | case CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE: |
| | std::cout << "hw buffer size: " << getPcSampDataParams.pPCSamplingConfigurationInfo[i].attributeData.hardwareBufferSizeData.hardwareBufferSize << std::endl; |
| | break; |
| | case CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE: |
| | std::cout << "collection mode: " << getPcSampDataParams.pPCSamplingConfigurationInfo[i].attributeData.collectionModeData.collectionMode << std::endl; |
| | break; |
| | case CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL: |
| | std::cout << "enable start stop: " << getPcSampDataParams.pPCSamplingConfigurationInfo[i].attributeData.enableStartStopControlData.enableStartStopControl << std::endl; |
| | break; |
| | case CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_OUTPUT_DATA_FORMAT: |
| | std::cout << "output data format: " << getPcSampDataParams.pPCSamplingConfigurationInfo[i].attributeData.outputDataFormatData.outputDataFormat << std::endl; |
| | break; |
| | default: |
| | break; |
| | } |
| | } |
| | std::cout << "========================================================================" << std::endl; |
| | } |
| |
|
| | static void PrintRetrievedPcSampData() |
| | { |
| | for (size_t pcSampBufferIndex = 0; pcSampBufferIndex < buffersRetrievedDataVector.size(); pcSampBufferIndex++) |
| | { |
| | std::cout << "========================== PC Records Buffer Info ==========================" << std::endl; |
| | std::cout << "Buffer Number: " << pcSampBufferIndex + 1 |
| | << ", Range Id: " << buffersRetrievedDataVector[pcSampBufferIndex].rangeId |
| | << ", Count of PC records: " << buffersRetrievedDataVector[pcSampBufferIndex].totalNumPcs |
| | << ", Total Samples: " << buffersRetrievedDataVector[pcSampBufferIndex].totalSamples |
| | << ", Total Dropped Samples: " << buffersRetrievedDataVector[pcSampBufferIndex].droppedSamples; |
| | if (CHECK_PC_SAMPLING_STRUCT_FIELD_EXISTS(CUpti_PCSamplingData, nonUsrKernelsTotalSamples, buffersRetrievedDataVector[pcSampBufferIndex].size)) |
| | { |
| | std::cout << ", Non User Kernels Total Samples: " << buffersRetrievedDataVector[pcSampBufferIndex].nonUsrKernelsTotalSamples; |
| | } |
| | std::cout << std::endl; |
| |
|
| | for(size_t i=0 ; i < buffersRetrievedDataVector[pcSampBufferIndex].totalNumPcs; i++) |
| | { |
| | std::cout << ", cubinCrc: " << buffersRetrievedDataVector[pcSampBufferIndex].pPcData[i].cubinCrc |
| | << ", functionName: " << buffersRetrievedDataVector[pcSampBufferIndex].pPcData[i].functionName |
| | << ", functionIndex: " << buffersRetrievedDataVector[pcSampBufferIndex].pPcData[i].functionIndex |
| | << ", pcOffset: " << buffersRetrievedDataVector[pcSampBufferIndex].pPcData[i].pcOffset |
| | << ", stallReasonCount: " << buffersRetrievedDataVector[pcSampBufferIndex].pPcData[i].stallReasonCount; |
| |
|
| | for (size_t k=0; k < buffersRetrievedDataVector[pcSampBufferIndex].pPcData[i].stallReasonCount; k++) |
| | { |
| | std::cout << ", " << GetStallReason(buffersRetrievedDataVector[pcSampBufferIndex].pPcData[i].stallReason[k].pcSamplingStallReasonIndex) |
| | << ": " << buffersRetrievedDataVector[pcSampBufferIndex].pPcData[i].stallReason[k].samples; |
| | } |
| | std::cout << std::endl; |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | static void RetrievePcSampData() |
| | { |
| | std::ifstream fileHandler(fileName, std::ios::out | std::ios::binary); |
| |
|
| | if (!fileHandler) |
| | { |
| | std::cerr << "Cannot open file : " << fileName << std::endl; |
| | exit(EXIT_FAILURE); |
| | } |
| |
|
| | CUptiUtil_GetHeaderDataParams getHeaderDataParams = {}; |
| | getHeaderDataParams.size = CUptiUtil_GetHeaderDataParamsSize; |
| | getHeaderDataParams.fileHandler = &fileHandler; |
| |
|
| | CUPTI_UTIL_CALL(CuptiUtilGetHeaderData(&getHeaderDataParams)); |
| |
|
| | if (verbose) |
| | { |
| | std::cout << "Total buffers available in file " << fileName << ": " << getHeaderDataParams.headerInfo.totalBuffers << std::endl; |
| | } |
| |
|
| | for(size_t i=0; i<getHeaderDataParams.headerInfo.totalBuffers ; i++) |
| | { |
| | CUptiUtil_GetBufferInfoParams getBufferInfoParams = {}; |
| | getBufferInfoParams.size = CUptiUtil_GetBufferInfoParamsSize; |
| | getBufferInfoParams.fileHandler = &fileHandler; |
| |
|
| | CUPTI_UTIL_CALL(CuptiUtilGetBufferInfo(&getBufferInfoParams)); |
| |
|
| | CUpti_PCSamplingData buffersRereivedData = {0}; |
| | buffersRereivedData.pPcData = (CUpti_PCSamplingPCData *) calloc (getBufferInfoParams.bufferInfoData.recordCount, sizeof(CUpti_PCSamplingPCData)); |
| | MEMORY_ALLOCATION_CALL(buffersRereivedData.pPcData); |
| | for (size_t j=0; j<getBufferInfoParams.bufferInfoData.recordCount; j++) |
| | { |
| | buffersRereivedData.pPcData[j].stallReason = (CUpti_PCSamplingStallReason *)calloc(getBufferInfoParams.bufferInfoData.numSelectedStallReasons, sizeof(CUpti_PCSamplingStallReason)); |
| | } |
| |
|
| | if (i == 0) |
| | { |
| | char **pStallReasonsRetrieve = (char **)calloc(getBufferInfoParams.bufferInfoData.numStallReasons, sizeof(char*)); |
| | MEMORY_ALLOCATION_CALL(pStallReasonsRetrieve); |
| | for (size_t i = 0; i < getBufferInfoParams.bufferInfoData.numStallReasons; i++) |
| | { |
| | pStallReasonsRetrieve[i] = (char *)calloc(CUPTI_STALL_REASON_STRING_SIZE, sizeof(char)); |
| | MEMORY_ALLOCATION_CALL(pStallReasonsRetrieve[i]); |
| | } |
| | uint32_t *pStallReasonIndexRetrieve = (uint32_t *)calloc(getBufferInfoParams.bufferInfoData.numStallReasons, sizeof(uint32_t)); |
| | MEMORY_ALLOCATION_CALL(pStallReasonIndexRetrieve); |
| |
|
| | pcSamplingStallReasonsRetrieve.numStallReasons = getBufferInfoParams.bufferInfoData.numStallReasons; |
| | pcSamplingStallReasonsRetrieve.stallReasonIndex = pStallReasonIndexRetrieve; |
| | pcSamplingStallReasonsRetrieve.stallReasons = pStallReasonsRetrieve; |
| |
|
| | CUpti_PCSamplingConfigurationInfo getSampPeriod = {}; |
| | getSampPeriod.attributeType = CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD; |
| |
|
| | CUpti_PCSamplingConfigurationInfo getStallReason = {}; |
| | uint32_t *pGetStallReasonIndex = (uint32_t *)calloc(getBufferInfoParams.bufferInfoData.numSelectedStallReasons, sizeof(uint32_t)); |
| | MEMORY_ALLOCATION_CALL(pGetStallReasonIndex); |
| | getStallReason.attributeType = CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON; |
| | getStallReason.attributeData.stallReasonData.pStallReasonIndex = pGetStallReasonIndex; |
| |
|
| | CUpti_PCSamplingConfigurationInfo getScratchBufferSize = {}; |
| | getScratchBufferSize.attributeType = CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SCRATCH_BUFFER_SIZE; |
| |
|
| | CUpti_PCSamplingConfigurationInfo getHwBufferSize = {}; |
| | getHwBufferSize.attributeType = CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE; |
| |
|
| | CUpti_PCSamplingConfigurationInfo getCollectionMode = {}; |
| | getCollectionMode.attributeType = CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE; |
| |
|
| | CUpti_PCSamplingConfigurationInfo getEnableStartStop = {}; |
| | getEnableStartStop.attributeType = CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL; |
| |
|
| | CUpti_PCSamplingConfigurationInfo getOutputDataFormat = {}; |
| | getOutputDataFormat.attributeType = CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_OUTPUT_DATA_FORMAT; |
| |
|
| | std::vector<CUpti_PCSamplingConfigurationInfo> pcSamplingConfigurationInfoRetrieve; |
| | pcSamplingConfigurationInfoRetrieve.push_back(getSampPeriod); |
| | pcSamplingConfigurationInfoRetrieve.push_back(getStallReason); |
| | pcSamplingConfigurationInfoRetrieve.push_back(getScratchBufferSize); |
| | pcSamplingConfigurationInfoRetrieve.push_back(getHwBufferSize); |
| | pcSamplingConfigurationInfoRetrieve.push_back(getCollectionMode); |
| | pcSamplingConfigurationInfoRetrieve.push_back(getEnableStartStop); |
| | pcSamplingConfigurationInfoRetrieve.push_back(getOutputDataFormat); |
| |
|
| | CUptiUtil_GetPcSampDataParams getPcSampDataParams = {}; |
| | getPcSampDataParams.size = CUptiUtil_GetPcSampDataParamsSize; |
| | getPcSampDataParams.fileHandler = &fileHandler; |
| | getPcSampDataParams.bufferType = PC_SAMPLING_BUFFER_PC_TO_COUNTER_DATA; |
| | getPcSampDataParams.pBufferInfoData = &getBufferInfoParams.bufferInfoData; |
| | getPcSampDataParams.pSamplingData = (void*)&buffersRereivedData; |
| | getPcSampDataParams.numAttributes = pcSamplingConfigurationInfoRetrieve.size(); |
| | getPcSampDataParams.pPCSamplingConfigurationInfo = pcSamplingConfigurationInfoRetrieve.data(); |
| | getPcSampDataParams.pPcSamplingStallReasons = &pcSamplingStallReasonsRetrieve; |
| |
|
| | CUPTI_UTIL_CALL(CuptiUtilGetPcSampData(&getPcSampDataParams)); |
| |
|
| | for (size_t i=0; i<getPcSampDataParams.numAttributes; i++) |
| | { |
| | if (getPcSampDataParams.pPCSamplingConfigurationInfo[i].attributeType == CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE) |
| | { |
| | collectionMode = getPcSampDataParams.pPCSamplingConfigurationInfo[i].attributeData.collectionModeData.collectionMode; |
| | break; |
| | } |
| | } |
| |
|
| | if (verbose) |
| | { |
| | PrintConfigurationDetails(getPcSampDataParams); |
| | } |
| | } |
| | else |
| | { |
| | CUptiUtil_GetPcSampDataParams pGetOnlyPcSampDataParams = {}; |
| | pGetOnlyPcSampDataParams.size = CUptiUtil_GetPcSampDataParamsSize; |
| | pGetOnlyPcSampDataParams.fileHandler = &fileHandler; |
| | pGetOnlyPcSampDataParams.bufferType = PC_SAMPLING_BUFFER_PC_TO_COUNTER_DATA; |
| | pGetOnlyPcSampDataParams.pBufferInfoData = &getBufferInfoParams.bufferInfoData; |
| | pGetOnlyPcSampDataParams.pSamplingData = (void*)&buffersRereivedData; |
| | pGetOnlyPcSampDataParams.numAttributes = 0; |
| | pGetOnlyPcSampDataParams.pPCSamplingConfigurationInfo = NULL; |
| | pGetOnlyPcSampDataParams.pPcSamplingStallReasons = NULL; |
| |
|
| | CUPTI_UTIL_CALL(CuptiUtilGetPcSampData(&pGetOnlyPcSampDataParams)); |
| | } |
| |
|
| | buffersRetrievedDataVector.push_back(buffersRereivedData); |
| | } |
| |
|
| | fileHandler.close(); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | static void FillCrcModuleMap() |
| | { |
| | for(int i = 1 ;; i++) |
| | { |
| | ModuleDetails moduleDetailsStruct = {}; |
| | std::string cubinFileName = std::to_string(i) + ".cubin"; |
| |
|
| | std::ifstream fileHandler(cubinFileName, std::ios::binary | std::ios::ate); |
| |
|
| | if (!fileHandler) |
| | { |
| | break; |
| | } |
| |
|
| | moduleDetailsStruct.cubinSize = fileHandler.tellg(); |
| |
|
| | if (!fileHandler.seekg(0, std::ios::beg)) |
| | { |
| | std::cerr << "Unable to find size for cubin file " << cubinFileName << std::endl; |
| | exit(EXIT_FAILURE); |
| | } |
| |
|
| | moduleDetailsStruct.cubinImage = malloc(sizeof(char) * moduleDetailsStruct.cubinSize); |
| | MEMORY_ALLOCATION_CALL(moduleDetailsStruct.cubinImage); |
| |
|
| | fileHandler.read((char*)moduleDetailsStruct.cubinImage, moduleDetailsStruct.cubinSize); |
| |
|
| | fileHandler.close(); |
| |
|
| | if (verbose) |
| | { |
| | std::cout << "Read cubin file " << cubinFileName << std::endl; |
| | } |
| |
|
| | |
| | CUpti_GetCubinCrcParams cubinCrcParams = {0}; |
| | cubinCrcParams.size = CUpti_GetCubinCrcParamsSize; |
| | cubinCrcParams.cubinSize = moduleDetailsStruct.cubinSize; |
| | cubinCrcParams.cubin = moduleDetailsStruct.cubinImage; |
| |
|
| | CUPTI_CALL(cuptiGetCubinCrc(&cubinCrcParams)); |
| |
|
| | uint64_t cubinCrc = cubinCrcParams.cubinCrc; |
| | crcModuleMap.insert(std::make_pair(cubinCrc, moduleDetailsStruct)); |
| | } |
| |
|
| | if (verbose) |
| | { |
| | std::cout << std::endl; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | static void MergePcSampDataBuffers(CUpti_PCSamplingData **mergedPcSampDataBuffer, size_t& numMergedPcSampDataBuffer) |
| | { |
| | CUptiUtil_MergePcSampDataParams mergePcSampDataParams = {}; |
| | mergePcSampDataParams.size = CUptiUtil_MergePcSampDataParamsSize; |
| | mergePcSampDataParams.numberOfBuffers = buffersRetrievedDataVector.size(); |
| | mergePcSampDataParams.PcSampDataBuffer = buffersRetrievedDataVector.data(); |
| | mergePcSampDataParams.MergedPcSampDataBuffers = mergedPcSampDataBuffer; |
| | mergePcSampDataParams.numMergedBuffer = &numMergedPcSampDataBuffer; |
| |
|
| | CUPTI_UTIL_CALL(CuptiUtilMergePcSampData(&mergePcSampDataParams)); |
| |
|
| | if (verbose) |
| | { |
| | std::cout << buffersRetrievedDataVector.size() <<" buffers merged into " << numMergedPcSampDataBuffer << " buffer/s." << std::endl; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | static void SourceCorrelation(CUpti_PCSamplingData *pPcSampDataBuffer, size_t numPcSampDataBuffer) |
| | { |
| | std::map<uint64_t, ModuleDetails>::iterator itr; |
| | size_t numPcNoCubin = 0; |
| | size_t numPcNoLineinfo = 0; |
| |
|
| | for (size_t pcSampBufferIndex = 0; pcSampBufferIndex < numPcSampDataBuffer; pcSampBufferIndex++) |
| | { |
| | std::cout << "========================== PC Records Buffer Info ==========================" << std::endl; |
| | std::cout << "Buffer Number: " << pcSampBufferIndex + 1 |
| | << ", Range Id: " << pPcSampDataBuffer[pcSampBufferIndex].rangeId |
| | << ", Count of PC records: " << pPcSampDataBuffer[pcSampBufferIndex].totalNumPcs |
| | << ", Total Samples: " << pPcSampDataBuffer[pcSampBufferIndex].totalSamples |
| | << ", Total Dropped Samples: " << pPcSampDataBuffer[pcSampBufferIndex].droppedSamples; |
| | if (CHECK_PC_SAMPLING_STRUCT_FIELD_EXISTS(CUpti_PCSamplingData, nonUsrKernelsTotalSamples, buffersRetrievedDataVector[pcSampBufferIndex].size)) |
| | { |
| | std::cout << ", Non User Kernels Total Samples: " << buffersRetrievedDataVector[pcSampBufferIndex].nonUsrKernelsTotalSamples; |
| | } |
| | std::cout << std::endl; |
| |
|
| |
|
| |
|
| | for(size_t i=0 ; i < pPcSampDataBuffer[pcSampBufferIndex].totalNumPcs; i++) |
| | { |
| | |
| | itr = crcModuleMap.find(pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].cubinCrc); |
| |
|
| | if (itr == crcModuleMap.end()) |
| | { |
| | numPcNoCubin++; |
| |
|
| | if (!disablePcInfoPrints) |
| | { |
| | std::cout << "functionName: " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].functionName |
| | << ", functionIndex: " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].functionIndex |
| | << ", pcOffset: " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].pcOffset |
| | << ", lineNumber:0" |
| | << ", fileName: " << "ERROR_NO_CUBIN" |
| | << ", dirName: " |
| | << ", stallReasonCount: " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].stallReasonCount; |
| |
|
| | for (size_t k=0; k < pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].stallReasonCount; k++) |
| | { |
| | std::cout << ", " << GetStallReason(pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].stallReason[k].pcSamplingStallReasonIndex) |
| | << ": " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].stallReason[k].samples; |
| | } |
| | std::cout << std::endl; |
| | } |
| |
|
| | continue; |
| | } |
| |
|
| | if (!disablePcInfoPrints) |
| | { |
| | std::cout << "functionName: " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].functionName |
| | << ", functionIndex: " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].functionIndex |
| | << ", pcOffset: " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].pcOffset; |
| | } |
| |
|
| | CUpti_GetSassToSourceCorrelationParams pCSamplingGetSassToSourceCorrelationParams = {0}; |
| | pCSamplingGetSassToSourceCorrelationParams.size = CUpti_GetSassToSourceCorrelationParamsSize; |
| | pCSamplingGetSassToSourceCorrelationParams.functionName = pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].functionName; |
| | pCSamplingGetSassToSourceCorrelationParams.pcOffset = pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].pcOffset; |
| | pCSamplingGetSassToSourceCorrelationParams.cubin = itr->second.cubinImage; |
| | pCSamplingGetSassToSourceCorrelationParams.cubinSize = itr->second.cubinSize; |
| |
|
| | CUptiResult cuptiResult = cuptiGetSassToSourceCorrelation(&pCSamplingGetSassToSourceCorrelationParams); |
| |
|
| | if (!disablePcInfoPrints) |
| | { |
| | if (cuptiResult == CUPTI_SUCCESS) |
| | { |
| | std::cout << ", lineNumber: " << pCSamplingGetSassToSourceCorrelationParams.lineNumber |
| | << ", fileName: " << pCSamplingGetSassToSourceCorrelationParams.fileName |
| | << ", dirName: " << pCSamplingGetSassToSourceCorrelationParams.dirName; |
| |
|
| | free(pCSamplingGetSassToSourceCorrelationParams.fileName); |
| | free(pCSamplingGetSassToSourceCorrelationParams.dirName); |
| | } |
| | else |
| | { |
| | |
| | |
| | numPcNoLineinfo++; |
| | std::cout << ", lineNumber: 0" |
| | << ", fileName: " << "ERROR_NO_LINEINFO" |
| | << ", dirName: "; |
| | } |
| |
|
| | std::cout << ", stallReasonCount: " <<pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].stallReasonCount; |
| |
|
| | for (size_t k=0; k < pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].stallReasonCount; k++) |
| | { |
| | std::cout << ", " << GetStallReason(pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].stallReason[k].pcSamplingStallReasonIndex) |
| | << ": " << pPcSampDataBuffer[pcSampBufferIndex].pPcData[i].stallReason[k].samples; |
| | } |
| | std::cout << std::endl; |
| | } |
| | } |
| | } |
| |
|
| | if (numPcNoCubin) |
| | { |
| | std::cerr << std::endl << "WARNING :: For these many PCs did not find cubin of same CRC: " << numPcNoCubin << std::endl; |
| | } |
| | if (numPcNoLineinfo) |
| | { |
| | std::cerr << std::endl << "WARNING :: These many PCs belongs to cubin which don't have lineinfo: " << numPcNoLineinfo << std::endl; |
| | } |
| | } |
| |
|
| | static void FreePcSampStallReasonsMemory() |
| | { |
| | for (size_t i = 0; i < pcSamplingStallReasonsRetrieve.numStallReasons; i++) |
| | { |
| | free(pcSamplingStallReasonsRetrieve.stallReasons[i]); |
| | } |
| | free(pcSamplingStallReasonsRetrieve.stallReasons); |
| | free(pcSamplingStallReasonsRetrieve.stallReasonIndex); |
| | } |
| |
|
| | static void FreePcSampDataBuffers(CUpti_PCSamplingData *pcSampData, size_t numBuffers) |
| | { |
| | for (size_t i=0; i<numBuffers; i++) |
| | { |
| | for (size_t j=0; j<pcSampData[i].totalNumPcs; j++) |
| | { |
| | free(pcSampData[i].pPcData[j].stallReason); |
| | free(pcSampData[i].pPcData[j].functionName); |
| | } |
| | free(pcSampData[i].pPcData); |
| | } |
| | } |
| |
|
| | static void FreeCrcModuleMapMemory() |
| | { |
| | for (auto itr = crcModuleMap.begin(); itr != crcModuleMap.end(); itr++) |
| | { |
| | free(itr->second.cubinImage); |
| | } |
| | } |
| |
|
| | #endif |