File size: 9,134 Bytes
6be3106 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 | /*
* Copyright 2021 NVIDIA Corporation. All rights reserved
*
* Sample CUPTI app to output NVTX ranges.
* The sample adds NVTX ranges around a simple vector addition app
* NVTX functionality shown in the sample:
* Subscribe to NVTX callbacks and get NVTX records
* Create domain, add start/end and push/pop ranges w.r.t the domain
* Register string against a domain
* Naming of CUDA resources
*
* Before running the sample set the NVTX_INJECTION64_PATH
* environment variable pointing to the CUPTI Library.
* For Linux:
* export NVTX_INJECTION64_PATH=<full_path>/libcupti.so
* For Windows:
* set NVTX_INJECTION64_PATH=<full_path>/cupti.dll
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <cuda.h>
#include "cupti.h"
// Standard NVTX headers
#include "nvtx3/nvToolsExt.h"
#include "nvtx3/nvToolsExtCuda.h"
#include "nvtx3/nvToolsExtCudaRt.h"
// Includes definition of the callback structures to use for NVTX with CUPTI
#include "generated_nvtx_meta.h"
#define CUPTI_CALL(call) \
do { \
CUptiResult _status = call; \
if (_status != CUPTI_SUCCESS) { \
const char *errstr; \
cuptiGetResultString(_status, &errstr); \
fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \
__FILE__, __LINE__, #call, errstr); \
exit(EXIT_FAILURE); \
} \
} while (0)
#define BUF_SIZE (32 * 1024)
#define ALIGN_SIZE (8)
#define ALIGN_BUFFER(buffer, align) \
(((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer))
const char * getName(const char *name) {
if (name == NULL) {
return "<null>";
}
return name;
}
const char * getDomainName(const char *name) {
if (name == NULL) {
return "<default domain>";
}
return name;
}
const char * getActivityObjectKindString(CUpti_ActivityObjectKind kind) {
switch (kind) {
case CUPTI_ACTIVITY_OBJECT_PROCESS:
return "PROCESS";
case CUPTI_ACTIVITY_OBJECT_THREAD:
return "THREAD";
case CUPTI_ACTIVITY_OBJECT_DEVICE:
return "DEVICE";
case CUPTI_ACTIVITY_OBJECT_CONTEXT:
return "CONTEXT";
case CUPTI_ACTIVITY_OBJECT_STREAM:
return "STREAM";
default:
break;
}
return "<unknown>";
}
uint32_t getActivityObjectKindId(CUpti_ActivityObjectKind kind, CUpti_ActivityObjectKindId *id) {
switch (kind) {
case CUPTI_ACTIVITY_OBJECT_PROCESS:
return id->pt.processId;
case CUPTI_ACTIVITY_OBJECT_THREAD:
return id->pt.threadId;
case CUPTI_ACTIVITY_OBJECT_DEVICE:
return id->dcs.deviceId;
case CUPTI_ACTIVITY_OBJECT_CONTEXT:
return id->dcs.contextId;
case CUPTI_ACTIVITY_OBJECT_STREAM:
return id->dcs.streamId;
default:
break;
}
return 0xffffffff;
}
static void CUPTIAPI
bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) {
uint8_t *b;
*size = BUF_SIZE;
b = (uint8_t *)malloc(*size + ALIGN_SIZE);
*buffer = ALIGN_BUFFER(b, ALIGN_SIZE);
*maxNumRecords = 0;
if (*buffer == NULL) {
printf("Error: out of memory\n");
exit(EXIT_FAILURE);
}
}
static void CUPTIAPI
bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) {
CUptiResult status;
CUpti_Activity *record = NULL;
if (validSize > 0) {
do {
status = cuptiActivityGetNextRecord(buffer, validSize, &record);
if (status == CUPTI_SUCCESS) {
switch(record->kind) {
case CUPTI_ACTIVITY_KIND_MARKER:
{
CUpti_ActivityMarker2 *marker = (CUpti_ActivityMarker2 *) record;
printf("MARKER id %u [ %llu ], name %s, domain %s\n",
marker->id, (unsigned long long) marker->timestamp, getName(marker->name), getDomainName(marker->domain));
break;
}
case CUPTI_ACTIVITY_KIND_NAME:
{
CUpti_ActivityName *name = (CUpti_ActivityName *) record;
switch (name->objectKind)
{
case CUPTI_ACTIVITY_OBJECT_CONTEXT:
printf("NAME %s id %u %s id %u, name %s\n",
getActivityObjectKindString(name->objectKind),
getActivityObjectKindId(name->objectKind, &name->objectId),
getActivityObjectKindString(CUPTI_ACTIVITY_OBJECT_DEVICE),
getActivityObjectKindId(CUPTI_ACTIVITY_OBJECT_DEVICE, &name->objectId),
getName(name->name));
break;
case CUPTI_ACTIVITY_OBJECT_STREAM:
printf("NAME %s id %u %s id %u %s id %u, name %s\n",
getActivityObjectKindString(name->objectKind),
getActivityObjectKindId(name->objectKind, &name->objectId),
getActivityObjectKindString(CUPTI_ACTIVITY_OBJECT_CONTEXT),
getActivityObjectKindId(CUPTI_ACTIVITY_OBJECT_CONTEXT, &name->objectId),
getActivityObjectKindString(CUPTI_ACTIVITY_OBJECT_DEVICE),
getActivityObjectKindId(CUPTI_ACTIVITY_OBJECT_DEVICE, &name->objectId),
getName(name->name));
break;
default:
printf("NAME %s id %u, name %s\n",
getActivityObjectKindString(name->objectKind),
getActivityObjectKindId(name->objectKind, &name->objectId),
getName(name->name));
break;
}
}
default:
break;
}
}
else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) {
break;
}
else {
CUPTI_CALL(status);
}
} while (1);
// Report any records dropped from the queue
size_t dropped;
CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped));
if (dropped != 0) {
printf("Dropped %u activity records\n", (unsigned int) dropped);
}
}
}
static void CUPTIAPI
nvtxCallback(void *userdata, CUpti_CallbackDomain domain,
CUpti_CallbackId cbid, const void *cbdata)
{
CUpti_NvtxData* data = (CUpti_NvtxData*)cbdata;
switch (cbid) {
case CUPTI_CBID_NVTX_nvtxDomainCreateA: {
// Get the parameters passed to the NVTX function
nvtxDomainCreateA_params* params = (nvtxDomainCreateA_params*)data->functionParams;
// Get the return value of the NVTX function
nvtxDomainHandle_t* domainHandle = (nvtxDomainHandle_t*)data->functionReturnValue;
break;
}
case CUPTI_CBID_NVTX_nvtxMarkEx: {
nvtxMarkEx_params* params = (nvtxMarkEx_params*)data->functionParams;
break;
}
case CUPTI_CBID_NVTX_nvtxDomainMarkEx: {
nvtxDomainMarkEx_params* params = (nvtxDomainMarkEx_params*)data->functionParams;
break;
}
// Add more NVTX callbacks, refer "generated_nvtx_meta.h" for all NVTX callbacks
// If there is no return value for the NVTX function, functionReturnValue is NULL.
default:
break;
}
return;
}
void initTrace() {
CUpti_SubscriberHandle subscriber;
CUPTI_CALL(cuptiSubscribe(&subscriber, (CUpti_CallbackFunc)nvtxCallback, NULL));
CUPTI_CALL(cuptiEnableDomain(1, subscriber, CUPTI_CB_DOMAIN_NVTX));
CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted));
// For NVTX markers (Marker, Domain, Start/End ranges, Push/Pop ranges, Registered Strings)
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MARKER));
// For naming CUDA resources (Threads, Devices, Contexts, Streams)
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_NAME));
}
void finiTrace() {
// Force flush any remaining activity buffers before termination of the application
CUPTI_CALL(cuptiActivityFlushAll(1));
} |