File size: 34,056 Bytes

38fb1f6

/*
 * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//! sampleINT8API.cpp
//! This file contains implementation showcasing usage of INT8 calibration and precision APIs.
//! It creates classification networks such as mobilenet, vgg19, resnet-50 from onnx model file.
//! This sample showcae setting per-tensor dynamic range overriding calibrator generated scales if it exists.
//! This sample showcase how to set computation precision of layer. It involves forcing output tensor type of the layer
//! to particular precision. It can be run with the following command line: Command: ./sample_int8_api [-h or --help]
//! [-m modelfile] [-s per_tensor_dynamic_range_file] [-i image_file] [-r reference_file] [-d path/to/data/dir]
//! [--verbose] [-useDLA <id>]

// Define TRT entrypoints used in common code
#define DEFINE_TRT_ENTRYPOINTS 1
#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 0

#include "argsParser.h"
#include "buffers.h"
#include "common.h"
#include "logger.h"

#include "NvInfer.h"
#include "NvOnnxParser.h"

#include <cstdlib>
#include <cuda_runtime_api.h>
#include <fstream>
#include <iostream>
#include <sstream>
#include <unordered_map>
#include <vector>
using namespace nvinfer1;
using samplesCommon::SampleUniquePtr;

const std::string gSampleName = "TensorRT.sample_int8_api";

struct SampleINT8APIPreprocessing
{
    // Preprocessing values are available here:
    // https://github.com/onnx/models/tree/master/models/image_classification/resnet
    std::vector<int> inputDims{1, 3, 224, 224};
};

//!
//! \brief The SampleINT8APIParams structure groups the additional parameters required by
//!         the INT8 API sample
//!
struct SampleINT8APIParams
{
    bool verbose{false};
    bool writeNetworkTensors{false};
    int dlaCore{-1};

    SampleINT8APIPreprocessing mPreproc;
    std::string modelFileName;
    std::vector<std::string> dataDirs;
    std::string dynamicRangeFileName;
    std::string imageFileName;
    std::string referenceFileName;
    std::string networkTensorsFileName;
    std::string timingCacheFile;
};

//!
//! \brief The SampleINT8API class implements INT8 inference on classification networks.
//!
//! \details INT8 API usage for setting custom int8 range for each input layer. API showcase how
//!           to perform INT8 inference without calibration table
//!
class SampleINT8API
{
private:
    template <typename T>
    using SampleUniquePtr = std::unique_ptr<T>;

public:
    SampleINT8API(const SampleINT8APIParams& params)
        : mParams(params)
    {
    }

    //!
    //! \brief Builds the network engine
    //!
    sample::Logger::TestResult build();

    //!
    //! \brief Runs the TensorRT inference engine for this sample
    //!
    sample::Logger::TestResult infer();

    //!
    //! \brief Used to clean up any state created in the sample class
    //!
    sample::Logger::TestResult teardown();

    SampleINT8APIParams mParams; //!< Stores Sample Parameter

private:
    SampleUniquePtr<IRuntime> mRuntime{}; //!< The TensorRT Runtime used to deserialize the engine.

    std::shared_ptr<nvinfer1::ICudaEngine> mEngine{nullptr}; //!< The TensorRT engine used to run the network

    std::map<std::string, std::string> mInOut; //!< Input and output mapping of the network

    nvinfer1::Dims mInputDims; //!< The dimensions of the input to the network

    nvinfer1::Dims mOutputDims; //!< The dimensions of the output to the network

    std::unordered_map<std::string, float>
        mPerTensorDynamicRangeMap; //!< Mapping from tensor name to max absolute dynamic range values

    void getInputOutputNames(); //!< Populates input and output mapping of the network

    //!
    //! \brief Reads the ppm input image, preprocesses, and stores the result in a managed buffer
    //!
    bool prepareInput(const samplesCommon::BufferManager& buffers);

    //!
    //! \brief Verifies that the output is correct and prints it
    //!
    bool verifyOutput(const samplesCommon::BufferManager& buffers) const;

    //!
    //! \brief Populate per-tensor dynamic range values
    //!
    bool readPerTensorDynamicRangeValues();

    //!
    //! \brief  Sets custom dynamic range for network tensors
    //!
    bool setDynamicRange(SampleUniquePtr<nvinfer1::INetworkDefinition>& network);

    //!
    //! \brief  Sets computation precision for network layers
    //!
    void setLayerPrecision(SampleUniquePtr<nvinfer1::INetworkDefinition>& network);

    //!
    //! \brief  Write network tensor names to a file.
    //!
    void writeNetworkTensorNames(const SampleUniquePtr<nvinfer1::INetworkDefinition>& network);
};

//!
//! \brief  Populates input and output mapping of the network
//!
void SampleINT8API::getInputOutputNames()
{
    int32_t nbindings = mEngine->getNbIOTensors();
    ASSERT(nbindings == 2);
    for (int32_t b = 0; b < nbindings; ++b)
    {
        auto const bindingName = mEngine->getIOTensorName(b);
        nvinfer1::Dims dims = mEngine->getTensorShape(bindingName);
        if (mEngine->getTensorIOMode(bindingName) == TensorIOMode::kINPUT)
        {
            if (mParams.verbose)
            {
                sample::gLogInfo << "Found input: " << bindingName << " shape=" << dims
                                 << " dtype=" << static_cast<int32_t>(mEngine->getTensorDataType(bindingName))
                                 << std::endl;
            }
            mInOut["input"] = bindingName;
        }
        else
        {
            if (mParams.verbose)
            {
                sample::gLogInfo << "Found output: " << bindingName << " shape=" << dims
                                 << " dtype=" << static_cast<int32_t>(mEngine->getTensorDataType(bindingName))
                                 << std::endl;
            }
            mInOut["output"] = bindingName;
        }
    }
}

//!
//! \brief Populate per-tensor dyanamic range values
//!
bool SampleINT8API::readPerTensorDynamicRangeValues()
{
    std::ifstream iDynamicRangeStream(mParams.dynamicRangeFileName);
    if (!iDynamicRangeStream)
    {
        sample::gLogError << "Could not find per-tensor scales file: " << mParams.dynamicRangeFileName << std::endl;
        return false;
    }

    std::string line;
    char delim = ':';
    while (std::getline(iDynamicRangeStream, line))
    {
        std::istringstream iline(line);
        std::string token;
        std::getline(iline, token, delim);
        std::string tensorName = token;
        std::getline(iline, token, delim);
        float dynamicRange = std::stof(token);
        mPerTensorDynamicRangeMap[tensorName] = dynamicRange;
    }
    return true;
}

//!
//! \brief  Sets computation precision for network layers
//!
void SampleINT8API::setLayerPrecision(SampleUniquePtr<nvinfer1::INetworkDefinition>& network)
{
    sample::gLogInfo << "Setting Per Layer Computation Precision" << std::endl;
    for (int i = 0; i < network->getNbLayers(); ++i)
    {
        auto layer = network->getLayer(i);
        if (mParams.verbose)
        {
            std::string layerName = layer->getName();
            sample::gLogInfo << "Layer: " << layerName << ". Precision: INT8" << std::endl;
        }

        // Don't set the precision on non-computation layers as they don't support
        // int8.
        if (layer->getType() != LayerType::kCONSTANT && layer->getType() != LayerType::kCONCATENATION
            && layer->getType() != LayerType::kSHAPE)
        {
            // set computation precision of the layer
            layer->setPrecision(nvinfer1::DataType::kINT8);
        }

        for (int j = 0; j < layer->getNbOutputs(); ++j)
        {
            std::string tensorName = layer->getOutput(j)->getName();
            if (mParams.verbose)
            {
                std::string tensorName = layer->getOutput(j)->getName();
                sample::gLogInfo << "Tensor: " << tensorName << ". OutputType: INT8" << std::endl;
            }
            // set output type of execution tensors and not shape tensors.
            if (layer->getOutput(j)->isExecutionTensor())
            {
                layer->setOutputType(j, nvinfer1::DataType::kINT8);
            }
        }
    }
}

//!
//! \brief  Write network tensor names to a file.
//!
void SampleINT8API::writeNetworkTensorNames(const SampleUniquePtr<nvinfer1::INetworkDefinition>& network)
{
    sample::gLogInfo << "Sample requires to run with per-tensor dynamic range." << std::endl;
    sample::gLogInfo
        << "In order to run Int8 inference without calibration, user will need to provide dynamic range for all "
           "the network tensors."
        << std::endl;

    std::ofstream tensorsFile{mParams.networkTensorsFileName};

    // Iterate through network inputs to write names of input tensors.
    for (int i = 0; i < network->getNbInputs(); ++i)
    {
        std::string tName = network->getInput(i)->getName();
        tensorsFile << "TensorName: " << tName << std::endl;
        if (mParams.verbose)
        {
            sample::gLogInfo << "TensorName: " << tName << std::endl;
        }
    }

    // Iterate through network layers.
    for (int i = 0; i < network->getNbLayers(); ++i)
    {
        // Write output tensors of a layer to the file.
        for (int j = 0; j < network->getLayer(i)->getNbOutputs(); ++j)
        {
            std::string tName = network->getLayer(i)->getOutput(j)->getName();
            tensorsFile << "TensorName: " << tName << std::endl;
            if (mParams.verbose)
            {
                sample::gLogInfo << "TensorName: " << tName << std::endl;
            }
        }
    }
    tensorsFile.close();
    sample::gLogInfo << "Successfully generated network tensor names. Writing: " << mParams.networkTensorsFileName
                     << std::endl;
    sample::gLogInfo
        << "Use the generated tensor names file to create dynamic range file for Int8 inference. Follow README.md "
           "for instructions to generate dynamic_ranges.txt file."
        << std::endl;
}

//!
//! \brief  Sets custom dynamic range for network tensors
//!
bool SampleINT8API::setDynamicRange(SampleUniquePtr<nvinfer1::INetworkDefinition>& network)
{
    // populate per-tensor dynamic range
    if (!readPerTensorDynamicRangeValues())
    {
        return false;
    }

    sample::gLogInfo << "Setting Per Tensor Dynamic Range" << std::endl;
    if (mParams.verbose)
    {
        sample::gLogInfo
            << "If dynamic range for a tensor is missing, TensorRT will run inference assuming dynamic range for "
               "the tensor as optional."
            << std::endl;
        sample::gLogInfo
            << "If dynamic range for a tensor is required then inference will fail. Follow README.md to generate "
               "missing per-tensor dynamic range."
            << std::endl;
    }
    // set dynamic range for network input tensors
    for (int i = 0; i < network->getNbInputs(); ++i)
    {
        std::string tName = network->getInput(i)->getName();
        if (mPerTensorDynamicRangeMap.find(tName) != mPerTensorDynamicRangeMap.end())
        {
            if (!network->getInput(i)->setDynamicRange(
                    -mPerTensorDynamicRangeMap.at(tName), mPerTensorDynamicRangeMap.at(tName)))
            {
                return false;
            }
        }
        else
        {
            if (mParams.verbose)
            {
                sample::gLogWarning << "Missing dynamic range for tensor: " << tName << std::endl;
            }
        }
    }

    // set dynamic range for layer output tensors
    for (int i = 0; i < network->getNbLayers(); ++i)
    {
        auto lyr = network->getLayer(i);
        for (int j = 0, e = lyr->getNbOutputs(); j < e; ++j)
        {
            std::string tName = lyr->getOutput(j)->getName();
            if (mPerTensorDynamicRangeMap.find(tName) != mPerTensorDynamicRangeMap.end())
            {
                // Calibrator generated dynamic range for network tensor can be overriden or set using below API
                if (!lyr->getOutput(j)->setDynamicRange(
                        -mPerTensorDynamicRangeMap.at(tName), mPerTensorDynamicRangeMap.at(tName)))
                {
                    return false;
                }
            }
            else if (lyr->getType() == LayerType::kCONSTANT)
            {
                IConstantLayer* cLyr = static_cast<IConstantLayer*>(lyr);
                if (mParams.verbose)
                {
                    sample::gLogWarning << "Computing missing dynamic range for tensor, " << tName << ", from weights."
                                        << std::endl;
                }
                auto wts = cLyr->getWeights();
                double max = std::numeric_limits<double>::min();
                for (int64_t wb = 0, we = wts.count; wb < we; ++wb)
                {
                    double val{};
                    switch (wts.type)
                    {
                    case DataType::kFLOAT: val = static_cast<const float*>(wts.values)[wb]; break;
                    case DataType::kBOOL: val = static_cast<const bool*>(wts.values)[wb]; break;
                    case DataType::kINT8: val = static_cast<const int8_t*>(wts.values)[wb]; break;
                    case DataType::kHALF: val = static_cast<const half_float::half*>(wts.values)[wb]; break;
                    case DataType::kINT32: val = static_cast<const int32_t*>(wts.values)[wb]; break;
                    case DataType::kUINT8: val = static_cast<uint8_t const*>(wts.values)[wb]; break;
                    case DataType::kFP8:
                    case DataType::kBF16:
                    case DataType::kINT4:
                    case DataType::kINT64:
                    case DataType::kFP4:
                    case DataType::kE8M0: ASSERT(false && "Unsupported data type");
                    }
                    max = std::max(max, std::abs(val));
                }

                if (!lyr->getOutput(j)->setDynamicRange(-max, max))
                {
                    return false;
                }
            }
            else
            {
                if (mParams.verbose)
                {
                    sample::gLogWarning << "Missing dynamic range for tensor: " << tName << std::endl;
                }
            }
        }
    }

    if (mParams.verbose)
    {
        sample::gLogInfo << "Per Tensor Dynamic Range Values for the Network:" << std::endl;
        for (auto iter = mPerTensorDynamicRangeMap.begin(); iter != mPerTensorDynamicRangeMap.end(); ++iter)
            sample::gLogInfo << "Tensor: " << iter->first << ". Max Absolute Dynamic Range: " << iter->second
                             << std::endl;
    }
    return true;
}

//!
//! \brief Preprocess inputs and allocate host/device input buffers
//!
bool SampleINT8API::prepareInput(const samplesCommon::BufferManager& buffers)
{
    if (samplesCommon::toLower(samplesCommon::getFileType(mParams.imageFileName)).compare("ppm") != 0)
    {
        sample::gLogError << "Wrong format: " << mParams.imageFileName << " is not a ppm file." << std::endl;
        return false;
    }

    int channels = mParams.mPreproc.inputDims.at(1);
    int height = mParams.mPreproc.inputDims.at(2);
    int width = mParams.mPreproc.inputDims.at(3);
    int max{0};
    std::string magic;

    std::vector<uint8_t> fileData(channels * height * width);

    std::ifstream infile(mParams.imageFileName, std::ifstream::binary);
    ASSERT(infile.is_open() && "Attempting to read from a file that is not open.");
    infile >> magic >> width >> height >> max;
    infile.seekg(1, infile.cur);
    infile.read(reinterpret_cast<char*>(fileData.data()), width * height * channels);

    float* hostInputBuffer = static_cast<float*>(buffers.getHostBuffer(mInOut["input"]));

    // Convert HWC to CHW and Normalize
    for (int c = 0; c < channels; ++c)
    {
        for (int h = 0; h < height; ++h)
        {
            for (int w = 0; w < width; ++w)
            {
                int dstIdx = c * height * width + h * width + w;
                int srcIdx = h * width * channels + w * channels + c;
                hostInputBuffer[dstIdx] = (2.0F / 255.0F) * static_cast<float>(fileData[srcIdx]) - 1.0F;
            }
        }
    }
    return true;
}

//!
//! \brief Verifies that the output is correct and prints it
//!
bool SampleINT8API::verifyOutput(const samplesCommon::BufferManager& buffers) const
{
    // copy output host buffer data for further processing
    const float* probPtr = static_cast<const float*>(buffers.getHostBuffer(mInOut.at("output")));
    std::vector<float> output(probPtr, probPtr + mOutputDims.d[1]);

    auto inds = samplesCommon::argMagnitudeSort(output.cbegin(), output.cend());

    // read reference lables to generate prediction lables
    std::vector<std::string> referenceVector;
    if (!samplesCommon::readReferenceFile(mParams.referenceFileName, referenceVector))
    {
        sample::gLogError << "Unable to read reference file: " << mParams.referenceFileName << std::endl;
        return false;
    }

    std::vector<std::string> top5Result = samplesCommon::classify(referenceVector, output, 5);

    sample::gLogInfo << "SampleINT8API result: Detected:" << std::endl;
    for (int i = 1; i <= 5; ++i)
    {
        sample::gLogInfo << "[" << i << "]  " << top5Result[i - 1] << std::endl;
    }

    return true;
}

//!
//! \brief Creates the network, configures the builder and creates the network engine
//!
//! \details This function creates INT8 classification network by parsing the onnx model and builds
//!          the engine that will be used to run INT8 inference (mEngine)
//!
//! \return true if the engine was created successfully and false otherwise
//!
sample::Logger::TestResult SampleINT8API::build()
{
    auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
    if (!builder)
    {
        sample::gLogError << "Unable to create builder object." << std::endl;
        return sample::Logger::TestResult::kFAILED;
    }

    auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0));
    if (!network)
    {
        sample::gLogError << "Unable to create network object." << mParams.referenceFileName << std::endl;
        return sample::Logger::TestResult::kFAILED;
    }

    auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
    if (!config)
    {
        sample::gLogError << "Unable to create config object." << mParams.referenceFileName << std::endl;
        return sample::Logger::TestResult::kFAILED;
    }

    auto parser
        = SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
    if (!parser)
    {
        sample::gLogError << "Unable to create parser object." << mParams.referenceFileName << std::endl;
        return sample::Logger::TestResult::kFAILED;
    }

    // Parse ONNX model file to populate TensorRT INetwork
    int verbosity = (int) nvinfer1::ILogger::Severity::kERROR;
    if (!parser->parseFromFile(mParams.modelFileName.c_str(), verbosity))
    {
        sample::gLogError << "Unable to parse ONNX model file: " << mParams.modelFileName << std::endl;
        return sample::Logger::TestResult::kFAILED;
    }

    if (mParams.writeNetworkTensors)
    {
        writeNetworkTensorNames(network);
        return sample::Logger::TestResult::kWAIVED;
    }

    // Configure buider
    config->setFlag(BuilderFlag::kGPU_FALLBACK);

    // Enable INT8 model. Required to set custom per-tensor dynamic range or INT8 Calibration
    config->setFlag(BuilderFlag::kINT8);
    // Mark calibrator as null. As user provides dynamic range for each tensor, no calibrator is required
    config->setInt8Calibrator(nullptr);

    // force layer to execute with required precision
    setLayerPrecision(network);

    // set INT8 Per Tensor Dynamic range
    if (!setDynamicRange(network))
    {
        sample::gLogError << "Unable to set per-tensor dynamic range." << std::endl;
        return sample::Logger::TestResult::kFAILED;
    }

    // CUDA stream used for profiling by the builder.
    auto profileStream = samplesCommon::makeCudaStream();
    if (!profileStream)
    {
        return sample::Logger::TestResult::kFAILED;
    }
    config->setProfileStream(*profileStream);

    SampleUniquePtr<nvinfer1::ITimingCache> timingCache;
    if (!mParams.timingCacheFile.empty())
    {
        timingCache
            = samplesCommon::buildTimingCacheFromFile(sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile);
    }

    SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)};
    if (!plan)
    {
        sample::gLogError << "Unable to build serialized plan." << std::endl;
        return sample::Logger::TestResult::kFAILED;
    }

    if (timingCache != nullptr && !mParams.timingCacheFile.empty())
    {
        samplesCommon::updateTimingCacheFile(
            sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
    }

    if (!mRuntime)
    {
        mRuntime = SampleUniquePtr<IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
    }

    if (!mRuntime)
    {
        sample::gLogError << "Unable to create runtime." << std::endl;
        return sample::Logger::TestResult::kFAILED;
    }

    // build TRT engine
    mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
        mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter());
    if (!mEngine)
    {
        sample::gLogError << "Unable to build cuda engine." << std::endl;
        return sample::Logger::TestResult::kFAILED;
    }

    // populates input output map structure
    getInputOutputNames();

    mInputDims = mEngine->getTensorShape(mInOut["input"].c_str());
    mOutputDims = mEngine->getTensorShape(mInOut["output"].c_str());

    return sample::Logger::TestResult::kRUNNING;
}

//!
//! \brief Runs the TensorRT inference engine for this sample
//!
//! \details This function is the main execution function of the sample. It allocates
//!          the buffer, sets inputs, executes the engine, and verifies the output
//!
sample::Logger::TestResult SampleINT8API::infer()
{
    // Create RAII buffer manager object
    samplesCommon::BufferManager buffers(mEngine);

    auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
    if (!context)
    {
        return sample::Logger::TestResult::kFAILED;
    }

    for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
    {
        auto const name = mEngine->getIOTensorName(i);
        context->setTensorAddress(name, buffers.getDeviceBuffer(name));
    }

    // Read the input data into the managed buffers
    // There should be just 1 input tensor

    if (!prepareInput(buffers))
    {
        return sample::Logger::TestResult::kFAILED;
    }

    // Create CUDA stream for the execution of this inference
    cudaStream_t stream;
    CHECK(cudaStreamCreate(&stream));

    // Asynchronously copy data from host input buffers to device input buffers
    buffers.copyInputToDeviceAsync(stream);

    // Asynchronously enqueue the inference work
    if (!context->enqueueV3(stream))
    {
        return sample::Logger::TestResult::kFAILED;
    }

    // Asynchronously copy data from device output buffers to host output buffers
    buffers.copyOutputToHostAsync(stream);

    // Wait for the work in the stream to complete
    CHECK(cudaStreamSynchronize(stream));

    // Release stream
    CHECK(cudaStreamDestroy(stream));

    // Check and print the output of the inference
    return verifyOutput(buffers) ? sample::Logger::TestResult::kRUNNING : sample::Logger::TestResult::kFAILED;
}

//!
//! \brief Used to clean up any state created in the sample class
//!
sample::Logger::TestResult SampleINT8API::teardown()
{
    return sample::Logger::TestResult::kRUNNING;
}

//!
//! \brief The SampleINT8APIArgs structures groups the additional arguments required by
//!         the INT8 API sample
//!
struct SampleINT8APIArgs : public samplesCommon::Args
{
    bool verbose{false};
    bool writeNetworkTensors{false};
    std::string modelFileName{"resnet50.onnx"};
    std::string imageFileName{"airliner.ppm"};
    std::string referenceFileName{"reference_labels.txt"};
    std::string dynamicRangeFileName{"resnet50_per_tensor_dynamic_range.txt"};
    std::string networkTensorsFileName{"network_tensors.txt"};
};

//! \brief This function parses arguments specific to SampleINT8API
//!
bool parseSampleINT8APIArgs(SampleINT8APIArgs& args, int argc, char* argv[])
{
    for (int i = 1; i < argc; ++i)
    {
        if (!strncmp(argv[i], "--model=", 8))
        {
            args.modelFileName = (argv[i] + 8);
        }
        else if (!strncmp(argv[i], "--image=", 8))
        {
            args.imageFileName = (argv[i] + 8);
        }
        else if (!strncmp(argv[i], "--reference=", 12))
        {
            args.referenceFileName = (argv[i] + 12);
        }
        else if (!strncmp(argv[i], "--write_tensors", 15))
        {
            args.writeNetworkTensors = true;
        }
        else if (!strncmp(argv[i], "--network_tensors_file=", 23))
        {
            args.networkTensorsFileName = (argv[i] + 23);
        }
        else if (!strncmp(argv[i], "--ranges=", 9))
        {
            args.dynamicRangeFileName = (argv[i] + 9);
        }
        else if (!strncmp(argv[i], "--int8", 6))
        {
            args.runInInt8 = true;
        }
        else if (!strncmp(argv[i], "--fp16", 6))
        {
            args.runInFp16 = true;
        }
        else if (!strncmp(argv[i], "--useDLACore=", 13))
        {
            args.useDLACore = std::stoi(argv[i] + 13);
        }
        else if (!strncmp(argv[i], "--data=", 7))
        {
            std::string dirPath = (argv[i] + 7);
            if (dirPath.back() != '/')
            {
                dirPath.push_back('/');
            }
            args.dataDirs.push_back(dirPath);
        }
        else if (!strncmp(argv[i], "--timingCacheFile=", 18))
        {
            args.timingCacheFile = (argv[i] + 18);
        }
        else if (!strncmp(argv[i], "--verbose", 9) || !strncmp(argv[i], "-v", 2))
        {
            args.verbose = true;
        }
        else if (!strncmp(argv[i], "--help", 6) || !strncmp(argv[i], "-h", 2))
        {
            args.help = true;
        }
        else
        {
            sample::gLogError << "Invalid Argument: " << argv[i] << std::endl;
            return false;
        }
    }
    return true;
}

void validateInputParams(SampleINT8APIParams& params)
{
    sample::gLogInfo << "Please follow README.md to generate missing input files." << std::endl;
    sample::gLogInfo << "Validating input parameters. Using following input files for inference." << std::endl;
    params.modelFileName = samplesCommon::locateFile(params.modelFileName, params.dataDirs);
    sample::gLogInfo << "    Model File: " << params.modelFileName << std::endl;
    if (params.writeNetworkTensors)
    {
        sample::gLogInfo << "    Writing Network Tensors File to: " << params.networkTensorsFileName << std::endl;
        return;
    }
    params.imageFileName = samplesCommon::locateFile(params.imageFileName, params.dataDirs);
    sample::gLogInfo << "    Image File: " << params.imageFileName << std::endl;
    params.referenceFileName = samplesCommon::locateFile(params.referenceFileName, params.dataDirs);
    sample::gLogInfo << "    Reference File: " << params.referenceFileName << std::endl;
    params.dynamicRangeFileName = samplesCommon::locateFile(params.dynamicRangeFileName, params.dataDirs);
    sample::gLogInfo << "    Dynamic Range File: " << params.dynamicRangeFileName << std::endl;
    return;
}

//!
//! \brief This function initializes members of the params struct using the command line args
//!
SampleINT8APIParams initializeSampleParams(SampleINT8APIArgs args)
{
    SampleINT8APIParams params;
    if (args.dataDirs.empty()) // Use default directories if user hasn't provided directory paths
    {
        params.dataDirs.push_back("data/samples/int8_api/");
        params.dataDirs.push_back("data/int8_api/");
    }
    else // Use the data directory provided by the user
    {
        params.dataDirs = args.dataDirs;
    }

    params.dataDirs.push_back(""); // In case of absolute path search
    params.verbose = args.verbose;
    params.modelFileName = args.modelFileName;
    params.imageFileName = args.imageFileName;
    params.referenceFileName = args.referenceFileName;
    params.dynamicRangeFileName = args.dynamicRangeFileName;
    params.dlaCore = args.useDLACore;
    params.writeNetworkTensors = args.writeNetworkTensors;
    params.networkTensorsFileName = args.networkTensorsFileName;
    params.timingCacheFile = args.timingCacheFile;
    validateInputParams(params);
    return params;
}

//!
//! \brief This function prints the help information for running this sample
//!
void printHelpInfo()
{
    std::cout << "Usage: ./sample_int8_api [-h or --help] [--model=model_file] "
                 "[--ranges=per_tensor_dynamic_range_file] [--image=image_file] [--reference=reference_file] "
                 "[--data=/path/to/data/dir] [--useDLACore=<int>] [-v or --verbose] "
                 "[--timingCacheFile=timing_cache_file]\n";
    std::cout << "-h or --help. Display This help information" << std::endl;
    std::cout << "--model=model_file.onnx or /absolute/path/to/model_file.onnx. Generate model file using README.md in "
                 "case it does not exists. Default to resnet50.onnx"
              << std::endl;
    std::cout << "--image=image.ppm or /absolute/path/to/image.ppm. Image to infer. Defaults to airlines.ppm"
              << std::endl;
    std::cout << "--reference=reference.txt or /absolute/path/to/reference.txt. Reference labels file. Defaults to "
                 "reference_labels.txt"
              << std::endl;
    std::cout << "--ranges=ranges.txt or /absolute/path/to/ranges.txt. Specify custom per-tensor dynamic range for the "
                 "network. Defaults to resnet50_per_tensor_dynamic_range.txt"
              << std::endl;
    std::cout << "--write_tensors. Option to generate file containing network tensors name. By default writes to "
                 "network_tensors.txt file. To provide user defined file name use additional option "
                 "--network_tensors_file. See --network_tensors_file option usage for more detail."
              << std::endl;
    std::cout << "--network_tensors_file=network_tensors.txt or /absolute/path/to/network_tensors.txt. This option "
                 "needs to be used with --write_tensors option. Specify file name (will write to current execution "
                 "directory) or absolute path to file name to write network tensor names file. Dynamic range "
                 "corresponding to each network tensor is required to run the sample. Defaults to network_tensors.txt"
              << std::endl;
    std::cout << "--data=/path/to/data/dir. Specify data directory to search for above files in case absolute paths to "
                 "files are not provided. Defaults to data/samples/int8_api/ or data/int8_api/"
              << std::endl;
    std::cout << "--useDLACore=N. Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, "
                 "where n is the number of DLA engines on the platform."
              << std::endl;
    std::cout << "--timingCacheFile=functional.cache or /absolute/path/to/functional.cache. Specify path for timing "
                 "cache file. If it does not already exist, it will be created. Defaults to not using a timing cache."
              << std::endl;
    std::cout << "--verbose. Outputs per-tensor dynamic range and layer precision info for the network" << std::endl;
}

int main(int argc, char** argv)
{
    SampleINT8APIArgs args;
    bool argsOK = parseSampleINT8APIArgs(args, argc, argv);

    if (!argsOK)
    {
        sample::gLogError << "Invalid arguments" << std::endl;
        printHelpInfo();
        return EXIT_FAILURE;
    }

    if (args.help)
    {
        printHelpInfo();
        return EXIT_SUCCESS;
    }
    if (args.verbose)
    {
        sample::gLogger.setReportableSeverity(nvinfer1::ILogger::Severity::kVERBOSE);
    }

    auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);

    sample::gLogger.reportTestStart(sampleTest);

    SampleINT8APIParams params;
    params = initializeSampleParams(args);

    SampleINT8API sample(params);
    sample::gLogInfo << "Building and running a INT8 GPU inference engine for " << params.modelFileName << std::endl;

    auto buildStatus = sample.build();
    if (buildStatus == sample::Logger::TestResult::kWAIVED)
    {
        return sample::gLogger.reportWaive(sampleTest);
    }
    else if (buildStatus == sample::Logger::TestResult::kFAILED)
    {
        return sample::gLogger.reportFail(sampleTest);
    }

    if (sample.infer() != sample::Logger::TestResult::kRUNNING)
    {
        return sample::gLogger.reportFail(sampleTest);
    }

    if (sample.teardown() != sample::Logger::TestResult::kRUNNING)
    {
        return sample::gLogger.reportFail(sampleTest);
    }

    return sample::gLogger.reportPass(sampleTest);
}