File size: 8,626 Bytes
38fb1f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
/*
 * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef TRT_SAMPLE_REPORTING_H
#define TRT_SAMPLE_REPORTING_H

#include <functional>
#include <iostream>
#include <numeric>

#include "sampleOptions.h"

namespace sample
{

class BindingsStd;

//!
//! \struct InferenceTime
//! \brief Measurement times in milliseconds
//!
struct InferenceTime
{
    InferenceTime(float q, float i, float c, float o)
        : enq(q)
        , h2d(i)
        , compute(c)
        , d2h(o)
    {
    }

    InferenceTime() = default;
    InferenceTime(InferenceTime const&) = default;
    InferenceTime(InferenceTime&&) = default;
    InferenceTime& operator=(InferenceTime const&) = default;
    InferenceTime& operator=(InferenceTime&&) = default;
    ~InferenceTime() = default;

    float enq{0};     // Enqueue
    float h2d{0};     // Host to Device
    float compute{0}; // Compute
    float d2h{0};     // Device to Host

    // ideal latency
    float latency() const
    {
        return h2d + compute + d2h;
    }
};

//!
//! \struct InferenceTrace
//! \brief Measurement points in milliseconds
//!
struct InferenceTrace
{
    InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs, float ce, float os, float oe)
        : stream(s)
        , enqStart(es)
        , enqEnd(ee)
        , h2dStart(is)
        , h2dEnd(ie)
        , computeStart(cs)
        , computeEnd(ce)
        , d2hStart(os)
        , d2hEnd(oe)
    {
    }

    InferenceTrace() = default;
    InferenceTrace(InferenceTrace const&) = default;
    InferenceTrace(InferenceTrace&&) = default;
    InferenceTrace& operator=(InferenceTrace const&) = default;
    InferenceTrace& operator=(InferenceTrace&&) = default;
    ~InferenceTrace() = default;

    int32_t stream{0};
    float enqStart{0};
    float enqEnd{0};
    float h2dStart{0};
    float h2dEnd{0};
    float computeStart{0};
    float computeEnd{0};
    float d2hStart{0};
    float d2hEnd{0};
};

inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b)
{
    return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute, a.d2h + b.d2h);
}

inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b)
{
    return a = a + b;
}

//!
//! \struct PerformanceResult
//! \brief Performance result of a performance metric
//!
struct PerformanceResult
{
    float min{0.F};
    float max{0.F};
    float mean{0.F};
    float median{0.F};
    std::vector<float> percentiles;
    float coeffVar{0.F}; // coefficient of variation
};

//!
//! \brief Print benchmarking time and number of traces collected
//!
void printProlog(int32_t warmups, int32_t timings, float warmupMs, float walltime, std::ostream& os);

//!
//! \brief Print a timing trace
//!
void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg, std::ostream& os);

//!
//! \brief Print the performance summary of a trace
//!
void printEpilog(std::vector<InferenceTime> const& timings, std::vector<float> const& percentiles, int32_t batchSize,
    std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose);

//!
//! \brief Get the result of a specific performance metric from a trace
//!
PerformanceResult getPerformanceResult(std::vector<InferenceTime> const& timings,
    std::function<float(InferenceTime const&)> metricGetter, std::vector<float> const& percentiles);

//!
//! \brief Print the explanations of the performance metrics printed in printEpilog() function.
//!
void printMetricExplanations(std::ostream& os);

//!
//! \brief Print and summarize a timing trace
//!
void printPerformanceReport(std::vector<InferenceTrace> const& trace, ReportingOptions const& reportingOpts,
    InferenceOptions const& infOpts, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose);

//!
//! \brief Export a timing trace to JSON file
//!
void exportJSONTrace(
    std::vector<InferenceTrace> const& InferenceTime, std::string const& fileName, int32_t const nbWarmups);

//!
//! \brief Print input tensors to stream
//!
void dumpInputs(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings, std::ostream& os);

//!
//! \brief Print output tensors to stream
//!
void dumpOutputs(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings, std::ostream& os);

void dumpRawBindingsToFiles(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings, std::ostream& os);

//!
//! \brief Export output tensors to JSON file
//!
void exportJSONOutput(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings,
    std::string const& fileName, int32_t batch);

//!
//! \struct LayerProfile
//! \brief Layer profile information
//!
struct LayerProfile
{
    std::string name;
    std::vector<float> timeMs;
};

//!
//! \class Profiler
//! \brief Collect per-layer profile information, assuming times are reported in the same order
//!
class Profiler : public nvinfer1::IProfiler
{

public:
    void reportLayerTime(char const* layerName, float timeMs) noexcept override;

    void print(std::ostream& os) const noexcept;

    //!
    //! \brief Export a profile to JSON file
    //!
    void exportJSONProfile(std::string const& fileName) const noexcept;

private:
    float getTotalTime() const noexcept
    {
        auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) {
            return accumulator + std::accumulate(lp.timeMs.begin(), lp.timeMs.end(), 0.F, std::plus<float>());
        };
        return std::accumulate(mLayers.begin(), mLayers.end(), 0.0F, plusLayerTime);
    }

    float getMedianTime() const noexcept
    {
        if (mLayers.empty())
        {
            return 0.F;
        }
        std::vector<float> totalTime;
        for (size_t run = 0; run < mLayers[0].timeMs.size(); ++run)
        {
            auto const layerTime
                = [&run](float accumulator, LayerProfile const& lp) { return accumulator + lp.timeMs[run]; };
            auto t = std::accumulate(mLayers.begin(), mLayers.end(), 0.F, layerTime);
            totalTime.push_back(t);
        }
        return median(totalTime);
    }

    float getMedianTime(LayerProfile const& p) const noexcept
    {
        return median(p.timeMs);
    }

    static float median(std::vector<float> vals)
    {
        if (vals.empty())
        {
            return 0.F;
        }
        std::sort(vals.begin(), vals.end());
        if (vals.size() % 2U == 1U)
        {
            return vals[vals.size() / 2U];
        }
        return (vals[vals.size() / 2U - 1U] + vals[vals.size() / 2U]) * 0.5F;
    }

    //! return the total runtime of given layer profile
    float getTotalTime(LayerProfile const& p) const noexcept
    {
        auto const& vals = p.timeMs;
        return std::accumulate(vals.begin(), vals.end(), 0.F, std::plus<float>());
    }

    float getAvgTime(LayerProfile const& p) const noexcept
    {
        return getTotalTime(p) / p.timeMs.size();
    }

    std::vector<LayerProfile> mLayers;
    std::vector<LayerProfile>::iterator mIterator{mLayers.begin()};
    int32_t mUpdatesCount{0};
};

//!
//! \brief Print layer info to logger or export it to output JSON file.
//!
void printLayerInfo(
    ReportingOptions const& reporting, nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context);

//!
//! \brief Print optimization profile info to logger.
//!
void printOptimizationProfileInfo(ReportingOptions const& reporting, nvinfer1::ICudaEngine const* engine);

//! Forward declaration.
struct InferenceEnvironmentBase;

//!
//! \brief Print per-layer perf profile data to logger or export it to output JSON file.
//!
void printPerformanceProfile(ReportingOptions const& reporting, InferenceEnvironmentBase& iEnv);

//!
//! \brief Print binding output values to logger or export them to output JSON file.
//!
void printOutput(ReportingOptions const& reporting, InferenceEnvironmentBase const& iEnv, int32_t batch);

} // namespace sample

#endif // TRT_SAMPLE_REPORTING_H