File size: 8,626 Bytes

38fb1f6

/*
 * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef TRT_SAMPLE_REPORTING_H
#define TRT_SAMPLE_REPORTING_H

#include <functional>
#include <iostream>
#include <numeric>

#include "sampleOptions.h"

namespace sample
{

class BindingsStd;

//!
//! \struct InferenceTime
//! \brief Measurement times in milliseconds
//!
struct InferenceTime
{
    InferenceTime(float q, float i, float c, float o)
        : enq(q)
        , h2d(i)
        , compute(c)
        , d2h(o)
    {
    }

    InferenceTime() = default;
    InferenceTime(InferenceTime const&) = default;
    InferenceTime(InferenceTime&&) = default;
    InferenceTime& operator=(InferenceTime const&) = default;
    InferenceTime& operator=(InferenceTime&&) = default;
    ~InferenceTime() = default;

    float enq{0};     // Enqueue
    float h2d{0};     // Host to Device
    float compute{0}; // Compute
    float d2h{0};     // Device to Host

    // ideal latency
    float latency() const
    {
        return h2d + compute + d2h;
    }
};

//!
//! \struct InferenceTrace
//! \brief Measurement points in milliseconds
//!
struct InferenceTrace
{
    InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs, float ce, float os, float oe)
        : stream(s)
        , enqStart(es)
        , enqEnd(ee)
        , h2dStart(is)
        , h2dEnd(ie)
        , computeStart(cs)
        , computeEnd(ce)
        , d2hStart(os)
        , d2hEnd(oe)
    {
    }

    InferenceTrace() = default;
    InferenceTrace(InferenceTrace const&) = default;
    InferenceTrace(InferenceTrace&&) = default;
    InferenceTrace& operator=(InferenceTrace const&) = default;
    InferenceTrace& operator=(InferenceTrace&&) = default;
    ~InferenceTrace() = default;

    int32_t stream{0};
    float enqStart{0};
    float enqEnd{0};
    float h2dStart{0};
    float h2dEnd{0};
    float computeStart{0};
    float computeEnd{0};
    float d2hStart{0};
    float d2hEnd{0};
};

inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b)
{
    return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute, a.d2h + b.d2h);
}

inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b)
{
    return a = a + b;
}

//!
//! \struct PerformanceResult
//! \brief Performance result of a performance metric
//!
struct PerformanceResult
{
    float min{0.F};
    float max{0.F};
    float mean{0.F};
    float median{0.F};
    std::vector<float> percentiles;
    float coeffVar{0.F}; // coefficient of variation
};

//!
//! \brief Print benchmarking time and number of traces collected
//!
void printProlog(int32_t warmups, int32_t timings, float warmupMs, float walltime, std::ostream& os);

//!
//! \brief Print a timing trace
//!
void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg, std::ostream& os);

//!
//! \brief Print the performance summary of a trace
//!
void printEpilog(std::vector<InferenceTime> const& timings, std::vector<float> const& percentiles, int32_t batchSize,
    std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose);

//!
//! \brief Get the result of a specific performance metric from a trace
//!
PerformanceResult getPerformanceResult(std::vector<InferenceTime> const& timings,
    std::function<float(InferenceTime const&)> metricGetter, std::vector<float> const& percentiles);

//!
//! \brief Print the explanations of the performance metrics printed in printEpilog() function.
//!
void printMetricExplanations(std::ostream& os);

//!
//! \brief Print and summarize a timing trace
//!
void printPerformanceReport(std::vector<InferenceTrace> const& trace, ReportingOptions const& reportingOpts,
    InferenceOptions const& infOpts, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose);

//!
//! \brief Export a timing trace to JSON file
//!
void exportJSONTrace(
    std::vector<InferenceTrace> const& InferenceTime, std::string const& fileName, int32_t const nbWarmups);

//!
//! \brief Print input tensors to stream
//!
void dumpInputs(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings, std::ostream& os);

//!
//! \brief Print output tensors to stream
//!
void dumpOutputs(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings, std::ostream& os);

void dumpRawBindingsToFiles(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings, std::ostream& os);

//!
//! \brief Export output tensors to JSON file
//!
void exportJSONOutput(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings,
    std::string const& fileName, int32_t batch);

//!
//! \struct LayerProfile
//! \brief Layer profile information
//!
struct LayerProfile
{
    std::string name;
    std::vector<float> timeMs;
};

//!
//! \class Profiler
//! \brief Collect per-layer profile information, assuming times are reported in the same order
//!
class Profiler : public nvinfer1::IProfiler
{

public:
    void reportLayerTime(char const* layerName, float timeMs) noexcept override;

    void print(std::ostream& os) const noexcept;

    //!
    //! \brief Export a profile to JSON file
    //!
    void exportJSONProfile(std::string const& fileName) const noexcept;

private:
    float getTotalTime() const noexcept
    {
        auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) {
            return accumulator + std::accumulate(lp.timeMs.begin(), lp.timeMs.end(), 0.F, std::plus<float>());
        };
        return std::accumulate(mLayers.begin(), mLayers.end(), 0.0F, plusLayerTime);
    }

    float getMedianTime() const noexcept
    {
        if (mLayers.empty())
        {
            return 0.F;
        }
        std::vector<float> totalTime;
        for (size_t run = 0; run < mLayers[0].timeMs.size(); ++run)
        {
            auto const layerTime
                = [&run](float accumulator, LayerProfile const& lp) { return accumulator + lp.timeMs[run]; };
            auto t = std::accumulate(mLayers.begin(), mLayers.end(), 0.F, layerTime);
            totalTime.push_back(t);
        }
        return median(totalTime);
    }

    float getMedianTime(LayerProfile const& p) const noexcept
    {
        return median(p.timeMs);
    }

    static float median(std::vector<float> vals)
    {
        if (vals.empty())
        {
            return 0.F;
        }
        std::sort(vals.begin(), vals.end());
        if (vals.size() % 2U == 1U)
        {
            return vals[vals.size() / 2U];
        }
        return (vals[vals.size() / 2U - 1U] + vals[vals.size() / 2U]) * 0.5F;
    }

    //! return the total runtime of given layer profile
    float getTotalTime(LayerProfile const& p) const noexcept
    {
        auto const& vals = p.timeMs;
        return std::accumulate(vals.begin(), vals.end(), 0.F, std::plus<float>());
    }

    float getAvgTime(LayerProfile const& p) const noexcept
    {
        return getTotalTime(p) / p.timeMs.size();
    }

    std::vector<LayerProfile> mLayers;
    std::vector<LayerProfile>::iterator mIterator{mLayers.begin()};
    int32_t mUpdatesCount{0};
};

//!
//! \brief Print layer info to logger or export it to output JSON file.
//!
void printLayerInfo(
    ReportingOptions const& reporting, nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context);

//!
//! \brief Print optimization profile info to logger.
//!
void printOptimizationProfileInfo(ReportingOptions const& reporting, nvinfer1::ICudaEngine const* engine);

//! Forward declaration.
struct InferenceEnvironmentBase;

//!
//! \brief Print per-layer perf profile data to logger or export it to output JSON file.
//!
void printPerformanceProfile(ReportingOptions const& reporting, InferenceEnvironmentBase& iEnv);

//!
//! \brief Print binding output values to logger or export them to output JSON file.
//!
void printOutput(ReportingOptions const& reporting, InferenceEnvironmentBase const& iEnv, int32_t batch);

} // namespace sample

#endif // TRT_SAMPLE_REPORTING_H