|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "cuda.h" |
|
|
#include "cuda_runtime_api.h" |
|
|
#include "cupti_callbacks.h" |
|
|
#include "cupti_profiler_target.h" |
|
|
#include "cupti_driver_cbid.h" |
|
|
#include "cupti_target.h" |
|
|
#include "cupti_activity.h" |
|
|
#include "nvperf_host.h" |
|
|
|
|
|
#include <Eval.h> |
|
|
using ::NV::Metric::Eval::PrintMetricValues; |
|
|
|
|
|
#include <Metric.h> |
|
|
using ::NV::Metric::Config::GetConfigImage; |
|
|
using ::NV::Metric::Config::GetCounterDataPrefixImage; |
|
|
|
|
|
#include <Utils.h> |
|
|
using ::NV::Metric::Utils::GetNVPWResultString; |
|
|
|
|
|
#include <iostream> |
|
|
using ::std::cerr; |
|
|
using ::std::cout; |
|
|
using ::std::endl; |
|
|
|
|
|
#include <mutex> |
|
|
using ::std::mutex; |
|
|
|
|
|
#include <string> |
|
|
using ::std::string; |
|
|
|
|
|
#include <unordered_map> |
|
|
using ::std::unordered_map; |
|
|
|
|
|
#include <unordered_set> |
|
|
using ::std::unordered_set; |
|
|
|
|
|
#include <vector> |
|
|
using ::std::vector; |
|
|
|
|
|
#include <stdlib.h> |
|
|
|
|
|
#include "dlfcn.h" |
|
|
extern "C" |
|
|
{ |
|
|
extern decltype(dlsym) __libc_dlsym; |
|
|
extern decltype(dlopen) __libc_dlopen_mode; |
|
|
} |
|
|
|
|
|
|
|
|
#ifdef _WIN32 |
|
|
#define DLLEXPORT __declspec(dllexport) |
|
|
#define HIDDEN |
|
|
#else |
|
|
#define DLLEXPORT __attribute__((visibility("default"))) |
|
|
#define HIDDEN __attribute__((visibility("hidden"))) |
|
|
#endif |
|
|
|
|
|
#ifndef EXIT_WAIVED |
|
|
#define EXIT_WAIVED 2 |
|
|
#endif |
|
|
|
|
|
|
|
|
#define CUPTI_API_CALL(apiFuncCall) \ |
|
|
do { \ |
|
|
CUptiResult _status = apiFuncCall; \ |
|
|
if (_status != CUPTI_SUCCESS) { \ |
|
|
const char *errstr; \ |
|
|
cuptiGetResultString(_status, &errstr); \ |
|
|
fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ |
|
|
__FILE__, __LINE__, #apiFuncCall, errstr); \ |
|
|
exit(EXIT_FAILURE); \ |
|
|
} \ |
|
|
} while (0) |
|
|
|
|
|
#define RUNTIME_API_CALL(apiFuncCall) \ |
|
|
do { \ |
|
|
cudaError_t _status = apiFuncCall; \ |
|
|
if (_status != cudaSuccess) { \ |
|
|
fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ |
|
|
__FILE__, __LINE__, #apiFuncCall, cudaGetErrorString(_status));\ |
|
|
exit(EXIT_FAILURE); \ |
|
|
} \ |
|
|
} while (0) |
|
|
|
|
|
#define MEMORY_ALLOCATION_CALL(var) \ |
|
|
do { \ |
|
|
if (var == NULL) { \ |
|
|
fprintf(stderr, "%s:%d: Error: Memory Allocation Failed \n", \ |
|
|
__FILE__, __LINE__); \ |
|
|
exit(EXIT_FAILURE); \ |
|
|
} \ |
|
|
} while (0) |
|
|
|
|
|
#define DRIVER_API_CALL(apiFuncCall) \ |
|
|
do { \ |
|
|
CUresult _status = apiFuncCall; \ |
|
|
if (_status != CUDA_SUCCESS) { \ |
|
|
fprintf(stderr, "%s:%d: error: function %s failed with error %d.\n", \ |
|
|
__FILE__, __LINE__, #apiFuncCall, _status); \ |
|
|
exit(EXIT_FAILURE); \ |
|
|
} \ |
|
|
} while (0) |
|
|
|
|
|
#define NVPW_API_CALL(apiFuncCall) \ |
|
|
do { \ |
|
|
NVPA_Status _status = apiFuncCall; \ |
|
|
if (_status != NVPA_STATUS_SUCCESS) { \ |
|
|
fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ |
|
|
__FILE__, __LINE__, #apiFuncCall, GetNVPWResultString(_status)); \ |
|
|
exit(EXIT_FAILURE); \ |
|
|
} \ |
|
|
} while (0) |
|
|
|
|
|
|
|
|
struct ctxProfilerData |
|
|
{ |
|
|
CUcontext ctx; |
|
|
int dev_id; |
|
|
cudaDeviceProp dev_prop; |
|
|
vector<uint8_t> counterAvailabilityImage; |
|
|
CUpti_Profiler_CounterDataImageOptions counterDataImageOptions; |
|
|
vector<uint8_t> counterDataImage; |
|
|
vector<uint8_t> counterDataPrefixImage; |
|
|
vector<uint8_t> counterDataScratchBufferImage; |
|
|
vector<uint8_t> configImage; |
|
|
int maxNumRanges; |
|
|
int curRanges; |
|
|
int maxRangeNameLength; |
|
|
int iterations; |
|
|
|
|
|
|
|
|
ctxProfilerData() : curRanges(), maxRangeNameLength(64), iterations() |
|
|
{ |
|
|
char * env_var = getenv("INJECTION_KERNEL_COUNT"); |
|
|
if (env_var != NULL) |
|
|
{ |
|
|
int val = atoi(env_var); |
|
|
if (val < 1) |
|
|
{ |
|
|
cerr << "Read " << val << " kernels from INJECTION_KERNEL_COUNT, but must be >= 1; defaulting to 10." << endl; |
|
|
val = 10; |
|
|
} |
|
|
maxNumRanges = val; |
|
|
} |
|
|
else |
|
|
{ |
|
|
maxNumRanges = 10; |
|
|
} |
|
|
}; |
|
|
}; |
|
|
|
|
|
|
|
|
mutex ctx_data_mutex; |
|
|
unordered_map<CUcontext, ctxProfilerData> ctx_data; |
|
|
|
|
|
|
|
|
vector<string> metricNames; |
|
|
|
|
|
|
|
|
void initialize_state() |
|
|
{ |
|
|
static int profiler_initialized = 0; |
|
|
|
|
|
if (profiler_initialized == 0) |
|
|
{ |
|
|
|
|
|
CUpti_Profiler_Initialize_Params profilerInitializeParams = { CUpti_Profiler_Initialize_Params_STRUCT_SIZE }; |
|
|
CUPTI_API_CALL(cuptiProfilerInitialize(&profilerInitializeParams)); |
|
|
|
|
|
|
|
|
NVPW_InitializeHost_Params initializeHostParams = { NVPW_InitializeHost_Params_STRUCT_SIZE }; |
|
|
NVPW_API_CALL(NVPW_InitializeHost(&initializeHostParams)); |
|
|
|
|
|
profiler_initialized = 1; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
void initialize_ctx_data(ctxProfilerData &ctx_data) |
|
|
{ |
|
|
initialize_state(); |
|
|
|
|
|
|
|
|
CUpti_Profiler_GetCounterAvailability_Params getCounterAvailabilityParams = { CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE }; |
|
|
getCounterAvailabilityParams.ctx = ctx_data.ctx; |
|
|
CUPTI_API_CALL(cuptiProfilerGetCounterAvailability(&getCounterAvailabilityParams)); |
|
|
|
|
|
|
|
|
ctx_data.counterAvailabilityImage.resize(getCounterAvailabilityParams.counterAvailabilityImageSize); |
|
|
|
|
|
|
|
|
getCounterAvailabilityParams.pCounterAvailabilityImage = ctx_data.counterAvailabilityImage.data(); |
|
|
CUPTI_API_CALL(cuptiProfilerGetCounterAvailability(&getCounterAvailabilityParams)); |
|
|
|
|
|
|
|
|
if (!GetConfigImage(ctx_data.dev_prop.name, metricNames, ctx_data.configImage, ctx_data.counterAvailabilityImage.data())) |
|
|
{ |
|
|
cerr << "Failed to create configImage for context " << ctx_data.ctx << endl; |
|
|
exit(EXIT_FAILURE); |
|
|
} |
|
|
|
|
|
|
|
|
if (!GetCounterDataPrefixImage(ctx_data.dev_prop.name, metricNames, ctx_data.counterDataPrefixImage, ctx_data.counterAvailabilityImage.data())) |
|
|
{ |
|
|
cerr << "Failed to create counterDataPrefixImage for context " << ctx_data.ctx << endl; |
|
|
exit(EXIT_FAILURE); |
|
|
} |
|
|
|
|
|
|
|
|
ctx_data.counterDataImageOptions.pCounterDataPrefix = ctx_data.counterDataPrefixImage.data(); |
|
|
ctx_data.counterDataImageOptions.counterDataPrefixSize = ctx_data.counterDataPrefixImage.size(); |
|
|
ctx_data.counterDataImageOptions.maxNumRanges = ctx_data.maxNumRanges; |
|
|
ctx_data.counterDataImageOptions.maxNumRangeTreeNodes = ctx_data.maxNumRanges; |
|
|
ctx_data.counterDataImageOptions.maxRangeNameLength = ctx_data.maxRangeNameLength; |
|
|
|
|
|
|
|
|
CUpti_Profiler_CounterDataImage_CalculateSize_Params calculateSizeParams = { CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE }; |
|
|
calculateSizeParams.pOptions = &(ctx_data.counterDataImageOptions); |
|
|
calculateSizeParams.sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; |
|
|
CUPTI_API_CALL(cuptiProfilerCounterDataImageCalculateSize(&calculateSizeParams)); |
|
|
|
|
|
ctx_data.counterDataImage.resize(calculateSizeParams.counterDataImageSize); |
|
|
|
|
|
|
|
|
CUpti_Profiler_CounterDataImage_Initialize_Params initializeParams = { CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE }; |
|
|
initializeParams.pOptions = &(ctx_data.counterDataImageOptions); |
|
|
initializeParams.sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; |
|
|
initializeParams.counterDataImageSize = ctx_data.counterDataImage.size(); |
|
|
initializeParams.pCounterDataImage = ctx_data.counterDataImage.data(); |
|
|
CUPTI_API_CALL(cuptiProfilerCounterDataImageInitialize(&initializeParams)); |
|
|
|
|
|
|
|
|
CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params scratchBufferSizeParams = { CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE }; |
|
|
scratchBufferSizeParams.counterDataImageSize = ctx_data.counterDataImage.size(); |
|
|
scratchBufferSizeParams.pCounterDataImage = ctx_data.counterDataImage.data(); |
|
|
CUPTI_API_CALL(cuptiProfilerCounterDataImageCalculateScratchBufferSize(&scratchBufferSizeParams)); |
|
|
|
|
|
ctx_data.counterDataScratchBufferImage.resize(scratchBufferSizeParams.counterDataScratchBufferSize); |
|
|
|
|
|
|
|
|
CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params initScratchBufferParams = { CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE }; |
|
|
initScratchBufferParams.counterDataImageSize = ctx_data.counterDataImage.size(); |
|
|
initScratchBufferParams.pCounterDataImage = ctx_data.counterDataImage.data(); |
|
|
initScratchBufferParams.counterDataScratchBufferSize = ctx_data.counterDataScratchBufferImage.size();; |
|
|
initScratchBufferParams.pCounterDataScratchBuffer = ctx_data.counterDataScratchBufferImage.data(); |
|
|
CUPTI_API_CALL(cuptiProfilerCounterDataImageInitializeScratchBuffer(&initScratchBufferParams)); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
void start_session(ctxProfilerData &ctx_data) |
|
|
{ |
|
|
CUpti_Profiler_BeginSession_Params beginSessionParams = { CUpti_Profiler_BeginSession_Params_STRUCT_SIZE }; |
|
|
beginSessionParams.counterDataImageSize = ctx_data.counterDataImage.size(); |
|
|
beginSessionParams.pCounterDataImage = ctx_data.counterDataImage.data(); |
|
|
beginSessionParams.counterDataScratchBufferSize = ctx_data.counterDataScratchBufferImage.size(); |
|
|
beginSessionParams.pCounterDataScratchBuffer = ctx_data.counterDataScratchBufferImage.data(); |
|
|
beginSessionParams.ctx = ctx_data.ctx; |
|
|
beginSessionParams.maxLaunchesPerPass = ctx_data.maxNumRanges; |
|
|
beginSessionParams.maxRangesPerPass = ctx_data.maxNumRanges; |
|
|
beginSessionParams.pPriv = NULL; |
|
|
beginSessionParams.range = CUPTI_AutoRange; |
|
|
beginSessionParams.replayMode = CUPTI_KernelReplay; |
|
|
CUPTI_API_CALL(cuptiProfilerBeginSession(&beginSessionParams)); |
|
|
|
|
|
CUpti_Profiler_SetConfig_Params setConfigParams = { CUpti_Profiler_SetConfig_Params_STRUCT_SIZE }; |
|
|
setConfigParams.pConfig = ctx_data.configImage.data(); |
|
|
setConfigParams.configSize = ctx_data.configImage.size(); |
|
|
setConfigParams.passIndex = 0; |
|
|
setConfigParams.minNestingLevel = 1; |
|
|
setConfigParams.numNestingLevels = 1; |
|
|
setConfigParams.targetNestingLevel = 1; |
|
|
CUPTI_API_CALL(cuptiProfilerSetConfig(&setConfigParams)); |
|
|
|
|
|
CUpti_Profiler_EnableProfiling_Params enableProfilingParams = { CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE }; |
|
|
enableProfilingParams.ctx = ctx_data.ctx; |
|
|
CUPTI_API_CALL(cuptiProfilerEnableProfiling(&enableProfilingParams)); |
|
|
|
|
|
ctx_data.iterations++; |
|
|
} |
|
|
|
|
|
|
|
|
static void print_data(ctxProfilerData &ctx_data) |
|
|
{ |
|
|
cout << endl << "Context " << ctx_data.ctx << ", device " << ctx_data.dev_id << " (" << ctx_data.dev_prop.name << ") session " << ctx_data.iterations << ":" << endl; |
|
|
PrintMetricValues(ctx_data.dev_prop.name, ctx_data.counterDataImage, metricNames, ctx_data.counterAvailabilityImage.data()); |
|
|
} |
|
|
|
|
|
|
|
|
void end_session(ctxProfilerData &ctx_data) |
|
|
{ |
|
|
CUpti_Profiler_DisableProfiling_Params disableProfilingParams = { CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE }; |
|
|
disableProfilingParams.ctx = ctx_data.ctx; |
|
|
CUPTI_API_CALL(cuptiProfilerDisableProfiling(&disableProfilingParams)); |
|
|
|
|
|
CUpti_Profiler_UnsetConfig_Params unsetConfigParams = { CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE }; |
|
|
unsetConfigParams.ctx = ctx_data.ctx; |
|
|
CUPTI_API_CALL(cuptiProfilerUnsetConfig(&unsetConfigParams)); |
|
|
|
|
|
CUpti_Profiler_EndSession_Params endSessionParams = { CUpti_Profiler_EndSession_Params_STRUCT_SIZE }; |
|
|
endSessionParams.ctx = ctx_data.ctx; |
|
|
CUPTI_API_CALL(cuptiProfilerEndSession(&endSessionParams)); |
|
|
|
|
|
print_data(ctx_data); |
|
|
|
|
|
|
|
|
CUpti_Profiler_CounterDataImage_Initialize_Params initializeParams = { CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE }; |
|
|
initializeParams.pOptions = &(ctx_data.counterDataImageOptions); |
|
|
initializeParams.sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; |
|
|
initializeParams.counterDataImageSize = ctx_data.counterDataImage.size(); |
|
|
initializeParams.pCounterDataImage = ctx_data.counterDataImage.data(); |
|
|
CUPTI_API_CALL(cuptiProfilerCounterDataImageInitialize(&initializeParams)); |
|
|
} |
|
|
|
|
|
|
|
|
static void end_execution() |
|
|
{ |
|
|
CUPTI_API_CALL(cuptiGetLastError()); |
|
|
ctx_data_mutex.lock(); |
|
|
|
|
|
for (auto itr = ctx_data.begin(); itr != ctx_data.end(); ++itr) |
|
|
{ |
|
|
ctxProfilerData &data = itr->second; |
|
|
|
|
|
if (data.curRanges > 0) |
|
|
{ |
|
|
print_data(data); |
|
|
data.curRanges = 0; |
|
|
} |
|
|
} |
|
|
|
|
|
ctx_data_mutex.unlock(); |
|
|
} |
|
|
|
|
|
|
|
|
void callback(void * userdata, CUpti_CallbackDomain domain, CUpti_CallbackId cbid, void const * cbdata) |
|
|
{ |
|
|
static int initialized = 0; |
|
|
|
|
|
CUptiResult res; |
|
|
if (domain == CUPTI_CB_DOMAIN_DRIVER_API) |
|
|
{ |
|
|
|
|
|
if (cbid == CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel) |
|
|
{ |
|
|
CUpti_CallbackData const * data = static_cast<CUpti_CallbackData const *>(cbdata); |
|
|
CUcontext ctx = data->context; |
|
|
|
|
|
|
|
|
if (data->callbackSite == CUPTI_API_ENTER) |
|
|
{ |
|
|
|
|
|
|
|
|
ctx_data_mutex.lock(); |
|
|
if (ctx_data.count(ctx) > 0) |
|
|
{ |
|
|
|
|
|
if (ctx_data[ctx].curRanges == ctx_data[ctx].maxNumRanges) |
|
|
{ |
|
|
end_session(ctx_data[ctx]); |
|
|
ctx_data[ctx].curRanges = 0; |
|
|
} |
|
|
|
|
|
|
|
|
if (ctx_data[ctx].curRanges == 0) |
|
|
{ |
|
|
initialize_ctx_data(ctx_data[ctx]); |
|
|
start_session(ctx_data[ctx]); |
|
|
} |
|
|
|
|
|
|
|
|
ctx_data[ctx].curRanges++; |
|
|
} |
|
|
ctx_data_mutex.unlock(); |
|
|
} |
|
|
} |
|
|
} |
|
|
else if (domain == CUPTI_CB_DOMAIN_RESOURCE) |
|
|
{ |
|
|
|
|
|
if (cbid == CUPTI_CBID_RESOURCE_CONTEXT_CREATED) |
|
|
{ |
|
|
CUpti_ResourceData const * res_data = static_cast<CUpti_ResourceData const *>(cbdata); |
|
|
CUcontext ctx = res_data->context; |
|
|
|
|
|
|
|
|
ctxProfilerData data = { }; |
|
|
|
|
|
data.ctx = ctx; |
|
|
|
|
|
RUNTIME_API_CALL(cudaGetDevice(&(data.dev_id))); |
|
|
|
|
|
|
|
|
initialize_state(); |
|
|
CUpti_Profiler_DeviceSupported_Params params = { CUpti_Profiler_DeviceSupported_Params_STRUCT_SIZE }; |
|
|
params.cuDevice = data.dev_id; |
|
|
CUPTI_API_CALL(cuptiProfilerDeviceSupported(¶ms)); |
|
|
|
|
|
|
|
|
ctx_data_mutex.lock(); |
|
|
if (params.isSupported == CUPTI_PROFILER_CONFIGURATION_SUPPORTED) |
|
|
{ |
|
|
|
|
|
ctx_data[ctx] = data; |
|
|
initialize_ctx_data(ctx_data[ctx]); |
|
|
} |
|
|
else |
|
|
{ |
|
|
if (ctx_data.count(ctx)) |
|
|
{ |
|
|
|
|
|
ctx_data.erase(ctx); |
|
|
} |
|
|
|
|
|
cerr << "libinjection_2: Unable to profile context on device " << data.dev_id << endl; |
|
|
|
|
|
if (params.architecture == CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED) |
|
|
{ |
|
|
cerr << "\tdevice architecture is not supported" << endl; |
|
|
} |
|
|
|
|
|
if (params.sli == CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED) |
|
|
{ |
|
|
cerr << "\tdevice sli configuration is not supported" << endl; |
|
|
} |
|
|
|
|
|
if (params.vGpu == CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED) |
|
|
{ |
|
|
cerr << "\tdevice vgpu configuration is not supported" << endl; |
|
|
} |
|
|
else if (params.vGpu == CUPTI_PROFILER_CONFIGURATION_DISABLED) |
|
|
{ |
|
|
cerr << "\tdevice vgpu configuration disabled profiling support" << endl; |
|
|
} |
|
|
|
|
|
if (params.confidentialCompute == CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED) |
|
|
{ |
|
|
cerr << "\tdevice confidential compute configuration is not supported" << endl; |
|
|
} |
|
|
|
|
|
if (params.cmp == CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED) |
|
|
{ |
|
|
::std::cerr << "\tNVIDIA Crypto Mining Processors (CMP) are not supported" << ::std::endl; |
|
|
} |
|
|
} |
|
|
ctx_data_mutex.unlock(); |
|
|
} |
|
|
} |
|
|
|
|
|
return; |
|
|
} |
|
|
|
|
|
|
|
|
void register_callbacks() |
|
|
{ |
|
|
|
|
|
CUpti_SubscriberHandle subscriber; |
|
|
CUPTI_API_CALL(cuptiSubscribe(&subscriber, (CUpti_CallbackFunc)callback, NULL)); |
|
|
|
|
|
CUPTI_API_CALL(cuptiEnableCallback(1, subscriber, CUPTI_CB_DOMAIN_DRIVER_API, CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel)); |
|
|
|
|
|
CUPTI_API_CALL(cuptiEnableCallback(1, subscriber, CUPTI_CB_DOMAIN_RESOURCE, CUPTI_CBID_RESOURCE_CONTEXT_CREATED)); |
|
|
|
|
|
|
|
|
atexit(end_execution); |
|
|
} |
|
|
|
|
|
static bool injectionInitialized = false; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
extern "C" DLLEXPORT int InitializeInjection() |
|
|
{ |
|
|
if (injectionInitialized == false) |
|
|
{ |
|
|
injectionInitialized = true; |
|
|
|
|
|
|
|
|
char * metrics_env = getenv("INJECTION_METRICS"); |
|
|
if (metrics_env != NULL) |
|
|
{ |
|
|
char * tok = strtok(metrics_env, " ;,"); |
|
|
do |
|
|
{ |
|
|
cout << "Read " << tok << endl; |
|
|
metricNames.push_back(string(tok)); |
|
|
tok = strtok(NULL, " ;,"); |
|
|
} while (tok != NULL); |
|
|
} |
|
|
else |
|
|
{ |
|
|
metricNames.push_back("sm__cycles_elapsed.avg"); |
|
|
metricNames.push_back("smsp__sass_thread_inst_executed_op_dadd_pred_on.avg"); |
|
|
metricNames.push_back("smsp__sass_thread_inst_executed_op_dfma_pred_on.avg"); |
|
|
} |
|
|
|
|
|
|
|
|
register_callbacks(); |
|
|
} |
|
|
return 1; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
extern "C" DLLEXPORT void * dlsym(void * handle, char const * symbol) |
|
|
{ |
|
|
InitializeInjection(); |
|
|
|
|
|
typedef void * (*dlsym_fn)(void *, char const *); |
|
|
static dlsym_fn real_dlsym = NULL; |
|
|
if (real_dlsym == NULL) |
|
|
{ |
|
|
|
|
|
real_dlsym = (dlsym_fn)(__libc_dlsym(__libc_dlopen_mode("libdl.so", RTLD_LAZY), "dlsym")); |
|
|
} |
|
|
if (real_dlsym == NULL) |
|
|
{ |
|
|
cerr << "Error finding real dlsym symbol" << endl; |
|
|
return NULL; |
|
|
} |
|
|
return real_dlsym(handle, symbol); |
|
|
} |
|
|
|