1231: g0plus dockerfile

38fb1f6 verified about 2 months ago

34.1 kB

	/*
	* SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	* SPDX-License-Identifier: Apache-2.0
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	//! sampleINT8API.cpp
	//! This file contains implementation showcasing usage of INT8 calibration and precision APIs.
	//! It creates classification networks such as mobilenet, vgg19, resnet-50 from onnx model file.
	//! This sample showcae setting per-tensor dynamic range overriding calibrator generated scales if it exists.
	//! This sample showcase how to set computation precision of layer. It involves forcing output tensor type of the layer
	//! to particular precision. It can be run with the following command line: Command: ./sample_int8_api [-h or --help]
	//! [-m modelfile] [-s per_tensor_dynamic_range_file] [-i image_file] [-r reference_file] [-d path/to/data/dir]
	//! [--verbose] [-useDLA <id>]

	// Define TRT entrypoints used in common code
	#define DEFINE_TRT_ENTRYPOINTS 1
	#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 0

	#include "argsParser.h"
	#include "buffers.h"
	#include "common.h"
	#include "logger.h"

	#include "NvInfer.h"
	#include "NvOnnxParser.h"

	#include <cstdlib>
	#include <cuda_runtime_api.h>
	#include <fstream>
	#include <iostream>
	#include <sstream>
	#include <unordered_map>
	#include <vector>
	using namespace nvinfer1;
	using samplesCommon::SampleUniquePtr;

	const std::string gSampleName = "TensorRT.sample_int8_api";

	struct SampleINT8APIPreprocessing
	{
	// Preprocessing values are available here:
	// https://github.com/onnx/models/tree/master/models/image_classification/resnet
	std::vector<int> inputDims{1, 3, 224, 224};
	};

	//!
	//! \brief The SampleINT8APIParams structure groups the additional parameters required by
	//! the INT8 API sample
	//!
	struct SampleINT8APIParams
	{
	bool verbose{false};
	bool writeNetworkTensors{false};
	int dlaCore{-1};

	SampleINT8APIPreprocessing mPreproc;
	std::string modelFileName;
	std::vector<std::string> dataDirs;
	std::string dynamicRangeFileName;
	std::string imageFileName;
	std::string referenceFileName;
	std::string networkTensorsFileName;
	std::string timingCacheFile;
	};

	//!
	//! \brief The SampleINT8API class implements INT8 inference on classification networks.
	//!
	//! \details INT8 API usage for setting custom int8 range for each input layer. API showcase how
	//! to perform INT8 inference without calibration table
	//!
	class SampleINT8API
	{
	private:
	template <typename T>
	using SampleUniquePtr = std::unique_ptr<T>;

	public:
	SampleINT8API(const SampleINT8APIParams& params)
	: mParams(params)
	{
	}

	//!
	//! \brief Builds the network engine
	//!
	sample::Logger::TestResult build();

	//!
	//! \brief Runs the TensorRT inference engine for this sample
	//!
	sample::Logger::TestResult infer();

	//!
	//! \brief Used to clean up any state created in the sample class
	//!
	sample::Logger::TestResult teardown();

	SampleINT8APIParams mParams; //!< Stores Sample Parameter

	private:
	SampleUniquePtr<IRuntime> mRuntime{}; //!< The TensorRT Runtime used to deserialize the engine.

	std::shared_ptr<nvinfer1::ICudaEngine> mEngine{nullptr}; //!< The TensorRT engine used to run the network

	std::map<std::string, std::string> mInOut; //!< Input and output mapping of the network

	nvinfer1::Dims mInputDims; //!< The dimensions of the input to the network

	nvinfer1::Dims mOutputDims; //!< The dimensions of the output to the network

	std::unordered_map<std::string, float>
	mPerTensorDynamicRangeMap; //!< Mapping from tensor name to max absolute dynamic range values

	void getInputOutputNames(); //!< Populates input and output mapping of the network

	//!
	//! \brief Reads the ppm input image, preprocesses, and stores the result in a managed buffer
	//!
	bool prepareInput(const samplesCommon::BufferManager& buffers);

	//!
	//! \brief Verifies that the output is correct and prints it
	//!
	bool verifyOutput(const samplesCommon::BufferManager& buffers) const;

	//!
	//! \brief Populate per-tensor dynamic range values
	//!
	bool readPerTensorDynamicRangeValues();

	//!
	//! \brief Sets custom dynamic range for network tensors
	//!
	bool setDynamicRange(SampleUniquePtr<nvinfer1::INetworkDefinition>& network);

	//!
	//! \brief Sets computation precision for network layers
	//!
	void setLayerPrecision(SampleUniquePtr<nvinfer1::INetworkDefinition>& network);

	//!
	//! \brief Write network tensor names to a file.
	//!
	void writeNetworkTensorNames(const SampleUniquePtr<nvinfer1::INetworkDefinition>& network);
	};

	//!
	//! \brief Populates input and output mapping of the network
	//!
	void SampleINT8API::getInputOutputNames()
	{
	int32_t nbindings = mEngine->getNbIOTensors();
	ASSERT(nbindings == 2);
	for (int32_t b = 0; b < nbindings; ++b)
	{
	auto const bindingName = mEngine->getIOTensorName(b);
	nvinfer1::Dims dims = mEngine->getTensorShape(bindingName);
	if (mEngine->getTensorIOMode(bindingName) == TensorIOMode::kINPUT)
	{
	if (mParams.verbose)
	{
	sample::gLogInfo << "Found input: " << bindingName << " shape=" << dims
	<< " dtype=" << static_cast<int32_t>(mEngine->getTensorDataType(bindingName))
	<< std::endl;
	}
	mInOut["input"] = bindingName;
	}
	else
	{
	if (mParams.verbose)
	{
	sample::gLogInfo << "Found output: " << bindingName << " shape=" << dims
	<< " dtype=" << static_cast<int32_t>(mEngine->getTensorDataType(bindingName))
	<< std::endl;
	}
	mInOut["output"] = bindingName;
	}
	}
	}

	//!
	//! \brief Populate per-tensor dyanamic range values
	//!
	bool SampleINT8API::readPerTensorDynamicRangeValues()
	{
	std::ifstream iDynamicRangeStream(mParams.dynamicRangeFileName);
	if (!iDynamicRangeStream)
	{
	sample::gLogError << "Could not find per-tensor scales file: " << mParams.dynamicRangeFileName << std::endl;
	return false;
	}

	std::string line;
	char delim = ':';
	while (std::getline(iDynamicRangeStream, line))
	{
	std::istringstream iline(line);
	std::string token;
	std::getline(iline, token, delim);
	std::string tensorName = token;
	std::getline(iline, token, delim);
	float dynamicRange = std::stof(token);
	mPerTensorDynamicRangeMap[tensorName] = dynamicRange;
	}
	return true;
	}

	//!
	//! \brief Sets computation precision for network layers
	//!
	void SampleINT8API::setLayerPrecision(SampleUniquePtr<nvinfer1::INetworkDefinition>& network)
	{
	sample::gLogInfo << "Setting Per Layer Computation Precision" << std::endl;
	for (int i = 0; i < network->getNbLayers(); ++i)
	{
	auto layer = network->getLayer(i);
	if (mParams.verbose)
	{
	std::string layerName = layer->getName();
	sample::gLogInfo << "Layer: " << layerName << ". Precision: INT8" << std::endl;
	}

	// Don't set the precision on non-computation layers as they don't support
	// int8.
	if (layer->getType() != LayerType::kCONSTANT && layer->getType() != LayerType::kCONCATENATION
	&& layer->getType() != LayerType::kSHAPE)
	{
	// set computation precision of the layer
	layer->setPrecision(nvinfer1::DataType::kINT8);
	}

	for (int j = 0; j < layer->getNbOutputs(); ++j)
	{
	std::string tensorName = layer->getOutput(j)->getName();
	if (mParams.verbose)
	{
	std::string tensorName = layer->getOutput(j)->getName();
	sample::gLogInfo << "Tensor: " << tensorName << ". OutputType: INT8" << std::endl;
	}
	// set output type of execution tensors and not shape tensors.
	if (layer->getOutput(j)->isExecutionTensor())
	{
	layer->setOutputType(j, nvinfer1::DataType::kINT8);
	}
	}
	}
	}

	//!
	//! \brief Write network tensor names to a file.
	//!
	void SampleINT8API::writeNetworkTensorNames(const SampleUniquePtr<nvinfer1::INetworkDefinition>& network)
	{
	sample::gLogInfo << "Sample requires to run with per-tensor dynamic range." << std::endl;
	sample::gLogInfo
	<< "In order to run Int8 inference without calibration, user will need to provide dynamic range for all "
	"the network tensors."
	<< std::endl;

	std::ofstream tensorsFile{mParams.networkTensorsFileName};

	// Iterate through network inputs to write names of input tensors.
	for (int i = 0; i < network->getNbInputs(); ++i)
	{
	std::string tName = network->getInput(i)->getName();
	tensorsFile << "TensorName: " << tName << std::endl;
	if (mParams.verbose)
	{
	sample::gLogInfo << "TensorName: " << tName << std::endl;
	}
	}

	// Iterate through network layers.
	for (int i = 0; i < network->getNbLayers(); ++i)
	{
	// Write output tensors of a layer to the file.
	for (int j = 0; j < network->getLayer(i)->getNbOutputs(); ++j)
	{
	std::string tName = network->getLayer(i)->getOutput(j)->getName();
	tensorsFile << "TensorName: " << tName << std::endl;
	if (mParams.verbose)
	{
	sample::gLogInfo << "TensorName: " << tName << std::endl;
	}
	}
	}
	tensorsFile.close();
	sample::gLogInfo << "Successfully generated network tensor names. Writing: " << mParams.networkTensorsFileName
	<< std::endl;
	sample::gLogInfo
	<< "Use the generated tensor names file to create dynamic range file for Int8 inference. Follow README.md "
	"for instructions to generate dynamic_ranges.txt file."
	<< std::endl;
	}

	//!
	//! \brief Sets custom dynamic range for network tensors
	//!
	bool SampleINT8API::setDynamicRange(SampleUniquePtr<nvinfer1::INetworkDefinition>& network)
	{
	// populate per-tensor dynamic range
	if (!readPerTensorDynamicRangeValues())
	{
	return false;
	}

	sample::gLogInfo << "Setting Per Tensor Dynamic Range" << std::endl;
	if (mParams.verbose)
	{
	sample::gLogInfo
	<< "If dynamic range for a tensor is missing, TensorRT will run inference assuming dynamic range for "
	"the tensor as optional."
	<< std::endl;
	sample::gLogInfo
	<< "If dynamic range for a tensor is required then inference will fail. Follow README.md to generate "
	"missing per-tensor dynamic range."
	<< std::endl;
	}
	// set dynamic range for network input tensors
	for (int i = 0; i < network->getNbInputs(); ++i)
	{
	std::string tName = network->getInput(i)->getName();
	if (mPerTensorDynamicRangeMap.find(tName) != mPerTensorDynamicRangeMap.end())
	{
	if (!network->getInput(i)->setDynamicRange(
	-mPerTensorDynamicRangeMap.at(tName), mPerTensorDynamicRangeMap.at(tName)))
	{
	return false;
	}
	}
	else
	{
	if (mParams.verbose)
	{
	sample::gLogWarning << "Missing dynamic range for tensor: " << tName << std::endl;
	}
	}
	}

	// set dynamic range for layer output tensors
	for (int i = 0; i < network->getNbLayers(); ++i)
	{
	auto lyr = network->getLayer(i);
	for (int j = 0, e = lyr->getNbOutputs(); j < e; ++j)
	{
	std::string tName = lyr->getOutput(j)->getName();
	if (mPerTensorDynamicRangeMap.find(tName) != mPerTensorDynamicRangeMap.end())
	{
	// Calibrator generated dynamic range for network tensor can be overriden or set using below API
	if (!lyr->getOutput(j)->setDynamicRange(
	-mPerTensorDynamicRangeMap.at(tName), mPerTensorDynamicRangeMap.at(tName)))
	{
	return false;
	}
	}
	else if (lyr->getType() == LayerType::kCONSTANT)
	{
	IConstantLayer* cLyr = static_cast<IConstantLayer*>(lyr);
	if (mParams.verbose)
	{
	sample::gLogWarning << "Computing missing dynamic range for tensor, " << tName << ", from weights."
	<< std::endl;
	}
	auto wts = cLyr->getWeights();
	double max = std::numeric_limits<double>::min();
	for (int64_t wb = 0, we = wts.count; wb < we; ++wb)
	{
	double val{};
	switch (wts.type)
	{
	case DataType::kFLOAT: val = static_cast<const float*>(wts.values)[wb]; break;
	case DataType::kBOOL: val = static_cast<const bool*>(wts.values)[wb]; break;
	case DataType::kINT8: val = static_cast<const int8_t*>(wts.values)[wb]; break;
	case DataType::kHALF: val = static_cast<const half_float::half*>(wts.values)[wb]; break;
	case DataType::kINT32: val = static_cast<const int32_t*>(wts.values)[wb]; break;
	case DataType::kUINT8: val = static_cast<uint8_t const*>(wts.values)[wb]; break;
	case DataType::kFP8:
	case DataType::kBF16:
	case DataType::kINT4:
	case DataType::kINT64:
	case DataType::kFP4:
	case DataType::kE8M0: ASSERT(false && "Unsupported data type");
	}
	max = std::max(max, std::abs(val));
	}

	if (!lyr->getOutput(j)->setDynamicRange(-max, max))
	{
	return false;
	}
	}
	else
	{
	if (mParams.verbose)
	{
	sample::gLogWarning << "Missing dynamic range for tensor: " << tName << std::endl;
	}
	}
	}
	}

	if (mParams.verbose)
	{
	sample::gLogInfo << "Per Tensor Dynamic Range Values for the Network:" << std::endl;
	for (auto iter = mPerTensorDynamicRangeMap.begin(); iter != mPerTensorDynamicRangeMap.end(); ++iter)
	sample::gLogInfo << "Tensor: " << iter->first << ". Max Absolute Dynamic Range: " << iter->second
	<< std::endl;
	}
	return true;
	}

	//!
	//! \brief Preprocess inputs and allocate host/device input buffers
	//!
	bool SampleINT8API::prepareInput(const samplesCommon::BufferManager& buffers)
	{
	if (samplesCommon::toLower(samplesCommon::getFileType(mParams.imageFileName)).compare("ppm") != 0)
	{
	sample::gLogError << "Wrong format: " << mParams.imageFileName << " is not a ppm file." << std::endl;
	return false;
	}

	int channels = mParams.mPreproc.inputDims.at(1);
	int height = mParams.mPreproc.inputDims.at(2);
	int width = mParams.mPreproc.inputDims.at(3);
	int max{0};
	std::string magic;

	std::vector<uint8_t> fileData(channels * height * width);

	std::ifstream infile(mParams.imageFileName, std::ifstream::binary);
	ASSERT(infile.is_open() && "Attempting to read from a file that is not open.");
	infile >> magic >> width >> height >> max;
	infile.seekg(1, infile.cur);
	infile.read(reinterpret_cast<char>(fileData.data()), width height * channels);

	float* hostInputBuffer = static_cast<float*>(buffers.getHostBuffer(mInOut["input"]));

	// Convert HWC to CHW and Normalize
	for (int c = 0; c < channels; ++c)
	{
	for (int h = 0; h < height; ++h)
	{
	for (int w = 0; w < width; ++w)
	{
	int dstIdx = c * height * width + h * width + w;
	int srcIdx = h * width * channels + w * channels + c;
	hostInputBuffer[dstIdx] = (2.0F / 255.0F) * static_cast<float>(fileData[srcIdx]) - 1.0F;
	}
	}
	}
	return true;
	}

	//!
	//! \brief Verifies that the output is correct and prints it
	//!
	bool SampleINT8API::verifyOutput(const samplesCommon::BufferManager& buffers) const
	{
	// copy output host buffer data for further processing
	const float* probPtr = static_cast<const float*>(buffers.getHostBuffer(mInOut.at("output")));
	std::vector<float> output(probPtr, probPtr + mOutputDims.d[1]);

	auto inds = samplesCommon::argMagnitudeSort(output.cbegin(), output.cend());

	// read reference lables to generate prediction lables
	std::vector<std::string> referenceVector;
	if (!samplesCommon::readReferenceFile(mParams.referenceFileName, referenceVector))
	{
	sample::gLogError << "Unable to read reference file: " << mParams.referenceFileName << std::endl;
	return false;
	}

	std::vector<std::string> top5Result = samplesCommon::classify(referenceVector, output, 5);

	sample::gLogInfo << "SampleINT8API result: Detected:" << std::endl;
	for (int i = 1; i <= 5; ++i)
	{
	sample::gLogInfo << "[" << i << "] " << top5Result[i - 1] << std::endl;
	}

	return true;
	}

	//!
	//! \brief Creates the network, configures the builder and creates the network engine
	//!
	//! \details This function creates INT8 classification network by parsing the onnx model and builds
	//! the engine that will be used to run INT8 inference (mEngine)
	//!
	//! \return true if the engine was created successfully and false otherwise
	//!
	sample::Logger::TestResult SampleINT8API::build()
	{
	auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
	if (!builder)
	{
	sample::gLogError << "Unable to create builder object." << std::endl;
	return sample::Logger::TestResult::kFAILED;
	}

	auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0));
	if (!network)
	{
	sample::gLogError << "Unable to create network object." << mParams.referenceFileName << std::endl;
	return sample::Logger::TestResult::kFAILED;
	}

	auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
	if (!config)
	{
	sample::gLogError << "Unable to create config object." << mParams.referenceFileName << std::endl;
	return sample::Logger::TestResult::kFAILED;
	}

	auto parser
	= SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
	if (!parser)
	{
	sample::gLogError << "Unable to create parser object." << mParams.referenceFileName << std::endl;
	return sample::Logger::TestResult::kFAILED;
	}

	// Parse ONNX model file to populate TensorRT INetwork
	int verbosity = (int) nvinfer1::ILogger::Severity::kERROR;
	if (!parser->parseFromFile(mParams.modelFileName.c_str(), verbosity))
	{
	sample::gLogError << "Unable to parse ONNX model file: " << mParams.modelFileName << std::endl;
	return sample::Logger::TestResult::kFAILED;
	}

	if (mParams.writeNetworkTensors)
	{
	writeNetworkTensorNames(network);
	return sample::Logger::TestResult::kWAIVED;
	}

	// Configure buider
	config->setFlag(BuilderFlag::kGPU_FALLBACK);

	// Enable INT8 model. Required to set custom per-tensor dynamic range or INT8 Calibration
	config->setFlag(BuilderFlag::kINT8);
	// Mark calibrator as null. As user provides dynamic range for each tensor, no calibrator is required
	config->setInt8Calibrator(nullptr);

	// force layer to execute with required precision
	setLayerPrecision(network);

	// set INT8 Per Tensor Dynamic range
	if (!setDynamicRange(network))
	{
	sample::gLogError << "Unable to set per-tensor dynamic range." << std::endl;
	return sample::Logger::TestResult::kFAILED;
	}

	// CUDA stream used for profiling by the builder.
	auto profileStream = samplesCommon::makeCudaStream();
	if (!profileStream)
	{
	return sample::Logger::TestResult::kFAILED;
	}
	config->setProfileStream(*profileStream);

	SampleUniquePtr<nvinfer1::ITimingCache> timingCache;
	if (!mParams.timingCacheFile.empty())
	{
	timingCache
	= samplesCommon::buildTimingCacheFromFile(sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile);
	}

	SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(network, config)};
	if (!plan)
	{
	sample::gLogError << "Unable to build serialized plan." << std::endl;
	return sample::Logger::TestResult::kFAILED;
	}

	if (timingCache != nullptr && !mParams.timingCacheFile.empty())
	{
	samplesCommon::updateTimingCacheFile(
	sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
	}

	if (!mRuntime)
	{
	mRuntime = SampleUniquePtr<IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
	}

	if (!mRuntime)
	{
	sample::gLogError << "Unable to create runtime." << std::endl;
	return sample::Logger::TestResult::kFAILED;
	}

	// build TRT engine
	mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
	mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter());
	if (!mEngine)
	{
	sample::gLogError << "Unable to build cuda engine." << std::endl;
	return sample::Logger::TestResult::kFAILED;
	}

	// populates input output map structure
	getInputOutputNames();

	mInputDims = mEngine->getTensorShape(mInOut["input"].c_str());
	mOutputDims = mEngine->getTensorShape(mInOut["output"].c_str());

	return sample::Logger::TestResult::kRUNNING;
	}

	//!
	//! \brief Runs the TensorRT inference engine for this sample
	//!
	//! \details This function is the main execution function of the sample. It allocates
	//! the buffer, sets inputs, executes the engine, and verifies the output
	//!
	sample::Logger::TestResult SampleINT8API::infer()
	{
	// Create RAII buffer manager object
	samplesCommon::BufferManager buffers(mEngine);

	auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
	if (!context)
	{
	return sample::Logger::TestResult::kFAILED;
	}

	for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
	{
	auto const name = mEngine->getIOTensorName(i);
	context->setTensorAddress(name, buffers.getDeviceBuffer(name));
	}

	// Read the input data into the managed buffers
	// There should be just 1 input tensor

	if (!prepareInput(buffers))
	{
	return sample::Logger::TestResult::kFAILED;
	}

	// Create CUDA stream for the execution of this inference
	cudaStream_t stream;
	CHECK(cudaStreamCreate(&stream));

	// Asynchronously copy data from host input buffers to device input buffers
	buffers.copyInputToDeviceAsync(stream);

	// Asynchronously enqueue the inference work
	if (!context->enqueueV3(stream))
	{
	return sample::Logger::TestResult::kFAILED;
	}

	// Asynchronously copy data from device output buffers to host output buffers
	buffers.copyOutputToHostAsync(stream);

	// Wait for the work in the stream to complete
	CHECK(cudaStreamSynchronize(stream));

	// Release stream
	CHECK(cudaStreamDestroy(stream));

	// Check and print the output of the inference
	return verifyOutput(buffers) ? sample::Logger::TestResult::kRUNNING : sample::Logger::TestResult::kFAILED;
	}

	//!
	//! \brief Used to clean up any state created in the sample class
	//!
	sample::Logger::TestResult SampleINT8API::teardown()
	{
	return sample::Logger::TestResult::kRUNNING;
	}

	//!
	//! \brief The SampleINT8APIArgs structures groups the additional arguments required by
	//! the INT8 API sample
	//!
	struct SampleINT8APIArgs : public samplesCommon::Args
	{
	bool verbose{false};
	bool writeNetworkTensors{false};
	std::string modelFileName{"resnet50.onnx"};
	std::string imageFileName{"airliner.ppm"};
	std::string referenceFileName{"reference_labels.txt"};
	std::string dynamicRangeFileName{"resnet50_per_tensor_dynamic_range.txt"};
	std::string networkTensorsFileName{"network_tensors.txt"};
	};

	//! \brief This function parses arguments specific to SampleINT8API
	//!
	bool parseSampleINT8APIArgs(SampleINT8APIArgs& args, int argc, char* argv[])
	{
	for (int i = 1; i < argc; ++i)
	{
	if (!strncmp(argv[i], "--model=", 8))
	{
	args.modelFileName = (argv[i] + 8);
	}
	else if (!strncmp(argv[i], "--image=", 8))
	{
	args.imageFileName = (argv[i] + 8);
	}
	else if (!strncmp(argv[i], "--reference=", 12))
	{
	args.referenceFileName = (argv[i] + 12);
	}
	else if (!strncmp(argv[i], "--write_tensors", 15))
	{
	args.writeNetworkTensors = true;
	}
	else if (!strncmp(argv[i], "--network_tensors_file=", 23))
	{
	args.networkTensorsFileName = (argv[i] + 23);
	}
	else if (!strncmp(argv[i], "--ranges=", 9))
	{
	args.dynamicRangeFileName = (argv[i] + 9);
	}
	else if (!strncmp(argv[i], "--int8", 6))
	{
	args.runInInt8 = true;
	}
	else if (!strncmp(argv[i], "--fp16", 6))
	{
	args.runInFp16 = true;
	}
	else if (!strncmp(argv[i], "--useDLACore=", 13))
	{
	args.useDLACore = std::stoi(argv[i] + 13);
	}
	else if (!strncmp(argv[i], "--data=", 7))
	{
	std::string dirPath = (argv[i] + 7);
	if (dirPath.back() != '/')
	{
	dirPath.push_back('/');
	}
	args.dataDirs.push_back(dirPath);
	}
	else if (!strncmp(argv[i], "--timingCacheFile=", 18))
	{
	args.timingCacheFile = (argv[i] + 18);
	}
	else if (!strncmp(argv[i], "--verbose", 9) \|\| !strncmp(argv[i], "-v", 2))
	{
	args.verbose = true;
	}
	else if (!strncmp(argv[i], "--help", 6) \|\| !strncmp(argv[i], "-h", 2))
	{
	args.help = true;
	}
	else
	{
	sample::gLogError << "Invalid Argument: " << argv[i] << std::endl;
	return false;
	}
	}
	return true;
	}

	void validateInputParams(SampleINT8APIParams& params)
	{
	sample::gLogInfo << "Please follow README.md to generate missing input files." << std::endl;
	sample::gLogInfo << "Validating input parameters. Using following input files for inference." << std::endl;
	params.modelFileName = samplesCommon::locateFile(params.modelFileName, params.dataDirs);
	sample::gLogInfo << " Model File: " << params.modelFileName << std::endl;
	if (params.writeNetworkTensors)
	{
	sample::gLogInfo << " Writing Network Tensors File to: " << params.networkTensorsFileName << std::endl;
	return;
	}
	params.imageFileName = samplesCommon::locateFile(params.imageFileName, params.dataDirs);
	sample::gLogInfo << " Image File: " << params.imageFileName << std::endl;
	params.referenceFileName = samplesCommon::locateFile(params.referenceFileName, params.dataDirs);
	sample::gLogInfo << " Reference File: " << params.referenceFileName << std::endl;
	params.dynamicRangeFileName = samplesCommon::locateFile(params.dynamicRangeFileName, params.dataDirs);
	sample::gLogInfo << " Dynamic Range File: " << params.dynamicRangeFileName << std::endl;
	return;
	}

	//!
	//! \brief This function initializes members of the params struct using the command line args
	//!
	SampleINT8APIParams initializeSampleParams(SampleINT8APIArgs args)
	{
	SampleINT8APIParams params;
	if (args.dataDirs.empty()) // Use default directories if user hasn't provided directory paths
	{
	params.dataDirs.push_back("data/samples/int8_api/");
	params.dataDirs.push_back("data/int8_api/");
	}
	else // Use the data directory provided by the user
	{
	params.dataDirs = args.dataDirs;
	}

	params.dataDirs.push_back(""); // In case of absolute path search
	params.verbose = args.verbose;
	params.modelFileName = args.modelFileName;
	params.imageFileName = args.imageFileName;
	params.referenceFileName = args.referenceFileName;
	params.dynamicRangeFileName = args.dynamicRangeFileName;
	params.dlaCore = args.useDLACore;
	params.writeNetworkTensors = args.writeNetworkTensors;
	params.networkTensorsFileName = args.networkTensorsFileName;
	params.timingCacheFile = args.timingCacheFile;
	validateInputParams(params);
	return params;
	}

	//!
	//! \brief This function prints the help information for running this sample
	//!
	void printHelpInfo()
	{
	std::cout << "Usage: ./sample_int8_api [-h or --help] [--model=model_file] "
	"[--ranges=per_tensor_dynamic_range_file] [--image=image_file] [--reference=reference_file] "
	"[--data=/path/to/data/dir] [--useDLACore=<int>] [-v or --verbose] "
	"[--timingCacheFile=timing_cache_file]\n";
	std::cout << "-h or --help. Display This help information" << std::endl;
	std::cout << "--model=model_file.onnx or /absolute/path/to/model_file.onnx. Generate model file using README.md in "
	"case it does not exists. Default to resnet50.onnx"
	<< std::endl;
	std::cout << "--image=image.ppm or /absolute/path/to/image.ppm. Image to infer. Defaults to airlines.ppm"
	<< std::endl;
	std::cout << "--reference=reference.txt or /absolute/path/to/reference.txt. Reference labels file. Defaults to "
	"reference_labels.txt"
	<< std::endl;
	std::cout << "--ranges=ranges.txt or /absolute/path/to/ranges.txt. Specify custom per-tensor dynamic range for the "
	"network. Defaults to resnet50_per_tensor_dynamic_range.txt"
	<< std::endl;
	std::cout << "--write_tensors. Option to generate file containing network tensors name. By default writes to "
	"network_tensors.txt file. To provide user defined file name use additional option "
	"--network_tensors_file. See --network_tensors_file option usage for more detail."
	<< std::endl;
	std::cout << "--network_tensors_file=network_tensors.txt or /absolute/path/to/network_tensors.txt. This option "
	"needs to be used with --write_tensors option. Specify file name (will write to current execution "
	"directory) or absolute path to file name to write network tensor names file. Dynamic range "
	"corresponding to each network tensor is required to run the sample. Defaults to network_tensors.txt"
	<< std::endl;
	std::cout << "--data=/path/to/data/dir. Specify data directory to search for above files in case absolute paths to "
	"files are not provided. Defaults to data/samples/int8_api/ or data/int8_api/"
	<< std::endl;
	std::cout << "--useDLACore=N. Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, "
	"where n is the number of DLA engines on the platform."
	<< std::endl;
	std::cout << "--timingCacheFile=functional.cache or /absolute/path/to/functional.cache. Specify path for timing "
	"cache file. If it does not already exist, it will be created. Defaults to not using a timing cache."
	<< std::endl;
	std::cout << "--verbose. Outputs per-tensor dynamic range and layer precision info for the network" << std::endl;
	}

	int main(int argc, char** argv)
	{
	SampleINT8APIArgs args;
	bool argsOK = parseSampleINT8APIArgs(args, argc, argv);

	if (!argsOK)
	{
	sample::gLogError << "Invalid arguments" << std::endl;
	printHelpInfo();
	return EXIT_FAILURE;
	}

	if (args.help)
	{
	printHelpInfo();
	return EXIT_SUCCESS;
	}
	if (args.verbose)
	{
	sample::gLogger.setReportableSeverity(nvinfer1::ILogger::Severity::kVERBOSE);
	}

	auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);

	sample::gLogger.reportTestStart(sampleTest);

	SampleINT8APIParams params;
	params = initializeSampleParams(args);

	SampleINT8API sample(params);
	sample::gLogInfo << "Building and running a INT8 GPU inference engine for " << params.modelFileName << std::endl;

	auto buildStatus = sample.build();
	if (buildStatus == sample::Logger::TestResult::kWAIVED)
	{
	return sample::gLogger.reportWaive(sampleTest);
	}
	else if (buildStatus == sample::Logger::TestResult::kFAILED)
	{
	return sample::gLogger.reportFail(sampleTest);
	}

	if (sample.infer() != sample::Logger::TestResult::kRUNNING)
	{
	return sample::gLogger.reportFail(sampleTest);
	}

	if (sample.teardown() != sample::Logger::TestResult::kRUNNING)
	{
	return sample::gLogger.reportFail(sampleTest);
	}

	return sample::gLogger.reportPass(sampleTest);
	}