File size: 12,239 Bytes
563c80f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#include <Metric.h>
#include <Parser.h>
#include <Utils.h>
#include <nvperf_host.h>
#include <nvperf_cuda_host.h>
#include <iostream>
#include <ScopeExit.h>

namespace NV {
    namespace Metric {
        namespace Config {

            bool GetRawMetricRequests(std::string chipName,
                                      const std::vector<std::string>& metricNames,
                                      std::vector<NVPA_RawMetricRequest>& rawMetricRequests,
                                      const uint8_t* pCounterAvailabilityImage)
            {
                NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params calculateScratchBufferSizeParam = {NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE};
                calculateScratchBufferSizeParam.pChipName = chipName.c_str();
                calculateScratchBufferSizeParam.pCounterAvailabilityImage = pCounterAvailabilityImage;
                RETURN_IF_NVPW_ERROR(false, NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize(&calculateScratchBufferSizeParam));

                std::vector<uint8_t> scratchBuffer(calculateScratchBufferSizeParam.scratchBufferSize);
                NVPW_CUDA_MetricsEvaluator_Initialize_Params metricEvaluatorInitializeParams = {NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE};
                metricEvaluatorInitializeParams.scratchBufferSize = scratchBuffer.size();
                metricEvaluatorInitializeParams.pScratchBuffer = scratchBuffer.data();
                metricEvaluatorInitializeParams.pChipName = chipName.c_str();
                metricEvaluatorInitializeParams.pCounterAvailabilityImage = pCounterAvailabilityImage;
                RETURN_IF_NVPW_ERROR(false, NVPW_CUDA_MetricsEvaluator_Initialize(&metricEvaluatorInitializeParams));
                NVPW_MetricsEvaluator* metricEvaluator = metricEvaluatorInitializeParams.pMetricsEvaluator;

                bool isolated = true;
                bool keepInstances = true;
                std::vector<const char*> rawMetricNames;
                for (auto& metricName : metricNames)
                {
                    std::string reqName;
                    NV::Metric::Parser::ParseMetricNameString(metricName, &reqName, &isolated, &keepInstances);
                    keepInstances = true;
                    NVPW_MetricEvalRequest metricEvalRequest;
                    NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params convertMetricToEvalRequest = {NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE};
                    convertMetricToEvalRequest.pMetricsEvaluator = metricEvaluator;
                    convertMetricToEvalRequest.pMetricName = reqName.c_str();
                    convertMetricToEvalRequest.pMetricEvalRequest = &metricEvalRequest;
                    convertMetricToEvalRequest.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE;
                    RETURN_IF_NVPW_ERROR(false, NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(&convertMetricToEvalRequest));

                    std::vector<const char*> rawDependencies;
                    NVPW_MetricsEvaluator_GetMetricRawDependencies_Params getMetricRawDependenciesParms = {NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE};
                    getMetricRawDependenciesParms.pMetricsEvaluator = metricEvaluator;
                    getMetricRawDependenciesParms.pMetricEvalRequests = &metricEvalRequest;
                    getMetricRawDependenciesParms.numMetricEvalRequests = 1;
                    getMetricRawDependenciesParms.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE;
                    getMetricRawDependenciesParms.metricEvalRequestStrideSize = sizeof(NVPW_MetricEvalRequest);
                    RETURN_IF_NVPW_ERROR(false, NVPW_MetricsEvaluator_GetMetricRawDependencies(&getMetricRawDependenciesParms));
                    rawDependencies.resize(getMetricRawDependenciesParms.numRawDependencies);
                    getMetricRawDependenciesParms.ppRawDependencies = rawDependencies.data();
                    RETURN_IF_NVPW_ERROR(false, NVPW_MetricsEvaluator_GetMetricRawDependencies(&getMetricRawDependenciesParms));

                    for (size_t i = 0; i < rawDependencies.size(); ++i)
                    {
                        rawMetricNames.push_back(rawDependencies[i]);
                    }
                }

                for (auto& rawMetricName : rawMetricNames)
                {
                    NVPA_RawMetricRequest metricRequest = { NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE };
                    metricRequest.pMetricName = rawMetricName;
                    metricRequest.isolated = isolated;
                    metricRequest.keepInstances = keepInstances;
                    rawMetricRequests.push_back(metricRequest);
                }

                NVPW_MetricsEvaluator_Destroy_Params metricEvaluatorDestroyParams = { NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE };
                metricEvaluatorDestroyParams.pMetricsEvaluator = metricEvaluator;
                RETURN_IF_NVPW_ERROR(false, NVPW_MetricsEvaluator_Destroy(&metricEvaluatorDestroyParams));
                return true;
            }

            bool GetConfigImage(std::string chipName, const std::vector<std::string>& metricNames, std::vector<uint8_t>& configImage, const uint8_t* pCounterAvailabilityImage)
            {
                std::vector<NVPA_RawMetricRequest> rawMetricRequests;
                GetRawMetricRequests(chipName, metricNames, rawMetricRequests, pCounterAvailabilityImage);

                NVPW_CUDA_RawMetricsConfig_Create_V2_Params rawMetricsConfigCreateParams = { NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE };
                rawMetricsConfigCreateParams.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
                rawMetricsConfigCreateParams.pChipName = chipName.c_str();
                rawMetricsConfigCreateParams.pCounterAvailabilityImage = pCounterAvailabilityImage;
                RETURN_IF_NVPW_ERROR(false, NVPW_CUDA_RawMetricsConfig_Create_V2(&rawMetricsConfigCreateParams));
                NVPA_RawMetricsConfig* pRawMetricsConfig = rawMetricsConfigCreateParams.pRawMetricsConfig;

                if(pCounterAvailabilityImage)
                {
                    NVPW_RawMetricsConfig_SetCounterAvailability_Params setCounterAvailabilityParams = {NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE};
                    setCounterAvailabilityParams.pRawMetricsConfig = pRawMetricsConfig;
                    setCounterAvailabilityParams.pCounterAvailabilityImage = pCounterAvailabilityImage;
                    RETURN_IF_NVPW_ERROR(false, NVPW_RawMetricsConfig_SetCounterAvailability(&setCounterAvailabilityParams));
                }

                NVPW_RawMetricsConfig_Destroy_Params rawMetricsConfigDestroyParams = { NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE };
                rawMetricsConfigDestroyParams.pRawMetricsConfig = pRawMetricsConfig;
                SCOPE_EXIT([&]() { NVPW_RawMetricsConfig_Destroy((NVPW_RawMetricsConfig_Destroy_Params *)&rawMetricsConfigDestroyParams); });

                NVPW_RawMetricsConfig_BeginPassGroup_Params beginPassGroupParams = { NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE };
                beginPassGroupParams.pRawMetricsConfig = pRawMetricsConfig;
                RETURN_IF_NVPW_ERROR(false, NVPW_RawMetricsConfig_BeginPassGroup(&beginPassGroupParams));

                NVPW_RawMetricsConfig_AddMetrics_Params addMetricsParams = { NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE };
                addMetricsParams.pRawMetricsConfig = pRawMetricsConfig;
                addMetricsParams.pRawMetricRequests = rawMetricRequests.data();
                addMetricsParams.numMetricRequests = rawMetricRequests.size();
                RETURN_IF_NVPW_ERROR(false, NVPW_RawMetricsConfig_AddMetrics(&addMetricsParams));

                NVPW_RawMetricsConfig_EndPassGroup_Params endPassGroupParams = { NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE };
                endPassGroupParams.pRawMetricsConfig = pRawMetricsConfig;
                RETURN_IF_NVPW_ERROR(false, NVPW_RawMetricsConfig_EndPassGroup(&endPassGroupParams));

                NVPW_RawMetricsConfig_GenerateConfigImage_Params generateConfigImageParams = { NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE };
                generateConfigImageParams.pRawMetricsConfig = pRawMetricsConfig;
                RETURN_IF_NVPW_ERROR(false, NVPW_RawMetricsConfig_GenerateConfigImage(&generateConfigImageParams));

                NVPW_RawMetricsConfig_GetConfigImage_Params getConfigImageParams = { NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE };
                getConfigImageParams.pRawMetricsConfig = pRawMetricsConfig;
                getConfigImageParams.bytesAllocated = 0;
                getConfigImageParams.pBuffer = NULL;
                RETURN_IF_NVPW_ERROR(false, NVPW_RawMetricsConfig_GetConfigImage(&getConfigImageParams));

                configImage.resize(getConfigImageParams.bytesCopied);
                getConfigImageParams.bytesAllocated = configImage.size();
                getConfigImageParams.pBuffer = configImage.data();
                RETURN_IF_NVPW_ERROR(false, NVPW_RawMetricsConfig_GetConfigImage(&getConfigImageParams));

                return true;
            }

            bool GetCounterDataPrefixImage(std::string chipName, const std::vector<std::string>& metricNames, std::vector<uint8_t>& counterDataImagePrefix, const uint8_t* pCounterAvailabilityImage)
            {
                std::vector<NVPA_RawMetricRequest> rawMetricRequests;
                GetRawMetricRequests(chipName, metricNames, rawMetricRequests, pCounterAvailabilityImage);

                NVPW_CUDA_CounterDataBuilder_Create_Params counterDataBuilderCreateParams = { NVPW_CUDA_CounterDataBuilder_Create_Params_STRUCT_SIZE };
                counterDataBuilderCreateParams.pChipName = chipName.c_str();
                counterDataBuilderCreateParams.pCounterAvailabilityImage = pCounterAvailabilityImage;
                RETURN_IF_NVPW_ERROR(false, NVPW_CUDA_CounterDataBuilder_Create(&counterDataBuilderCreateParams));

                NVPW_CounterDataBuilder_Destroy_Params counterDataBuilderDestroyParams = { NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE };
                counterDataBuilderDestroyParams.pCounterDataBuilder = counterDataBuilderCreateParams.pCounterDataBuilder;
                SCOPE_EXIT([&]() { NVPW_CounterDataBuilder_Destroy((NVPW_CounterDataBuilder_Destroy_Params *)&counterDataBuilderDestroyParams); });

                NVPW_CounterDataBuilder_AddMetrics_Params addMetricsParams = { NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE };
                addMetricsParams.pCounterDataBuilder = counterDataBuilderCreateParams.pCounterDataBuilder;
                addMetricsParams.pRawMetricRequests = rawMetricRequests.data();
                addMetricsParams.numMetricRequests = rawMetricRequests.size();
                RETURN_IF_NVPW_ERROR(false, NVPW_CounterDataBuilder_AddMetrics(&addMetricsParams));

                size_t counterDataPrefixSize = 0;
                NVPW_CounterDataBuilder_GetCounterDataPrefix_Params getCounterDataPrefixParams = { NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE };
                getCounterDataPrefixParams.pCounterDataBuilder = counterDataBuilderCreateParams.pCounterDataBuilder;
                getCounterDataPrefixParams.bytesAllocated = 0;
                getCounterDataPrefixParams.pBuffer = NULL;
                RETURN_IF_NVPW_ERROR(false, NVPW_CounterDataBuilder_GetCounterDataPrefix(&getCounterDataPrefixParams));

                counterDataImagePrefix.resize(getCounterDataPrefixParams.bytesCopied);
                getCounterDataPrefixParams.bytesAllocated = counterDataImagePrefix.size();
                getCounterDataPrefixParams.pBuffer = counterDataImagePrefix.data();
                RETURN_IF_NVPW_ERROR(false, NVPW_CounterDataBuilder_GetCounterDataPrefix(&getCounterDataPrefixParams));

                return true;
            }
        
        }
    }
}