1231: g0plus dockerfile

38fb1f6 verified about 2 months ago

23.3 kB

	/*
	* SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	* SPDX-License-Identifier: Apache-2.0
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	//!
	//! sampleDynamicReshape.cpp
	//! This file contains the implementation of the dynamic reshape MNIST sample. It creates a network
	//! using the MNIST ONNX model, and uses a second engine to resize inputs to the shape the model
	//! expects.
	//! It can be run with the following command:
	//! Command: ./sample_dynamic_reshape [-h or --help [-d=/path/to/data/dir or --datadir=/path/to/data/dir]
	//!

	// Define TRT entrypoints used in common code
	#define DEFINE_TRT_ENTRYPOINTS 1
	#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 0

	#include "BatchStream.h"
	#include "EntropyCalibrator.h"
	#include "argsParser.h"
	#include "buffers.h"
	#include "common.h"
	#include "logger.h"
	#include "parserOnnxConfig.h"

	#include "NvInfer.h"
	#include <cuda_runtime_api.h>
	#include <random>
	using namespace nvinfer1;
	using samplesCommon::SampleUniquePtr;

	const std::string gSampleName = "TensorRT.sample_dynamic_reshape";

	//! \brief The SampleDynamicReshape class implementes the dynamic reshape sample.
	//!
	//! \details This class builds one engine that resizes a given input to the correct size, and a
	//! second engine based on an ONNX MNIST model that generates a prediction.
	//!
	class SampleDynamicReshape
	{
	public:
	SampleDynamicReshape(const samplesCommon::OnnxSampleParams& params)
	: mParams(params)
	{
	}

	//!
	//! \brief Builds both engines.
	//!
	bool build();

	//!
	//! \brief Prepares the model for inference by creating execution contexts and allocating buffers.
	//!
	bool prepare();

	//!
	//! \brief Runs inference using TensorRT on a random image.
	//!
	bool infer();

	private:
	bool buildPreprocessorEngine(const SampleUniquePtr<nvinfer1::IBuilder>& builder,
	const SampleUniquePtr<nvinfer1::IRuntime>& runtime, cudaStream_t profileStream);
	bool buildPredictionEngine(const SampleUniquePtr<nvinfer1::IBuilder>& builder,
	const SampleUniquePtr<nvinfer1::IRuntime>& runtime, cudaStream_t profileStream);

	Dims loadPGMFile(const std::string& fileName);
	bool validateOutput(int digit);

	samplesCommon::OnnxSampleParams mParams; //!< The parameters for the sample.

	nvinfer1::Dims mPredictionInputDims; //!< The dimensions of the input of the MNIST model.
	nvinfer1::Dims mPredictionOutputDims; //!< The dimensions of the output of the MNIST model.

	SampleUniquePtr<nvinfer1::IRuntime> mRuntime{nullptr};

	// Engine plan files used for inference. One for resizing inputs, another for prediction.
	SampleUniquePtr<nvinfer1::ICudaEngine> mPreprocessorEngine{nullptr}, mPredictionEngine{nullptr};

	SampleUniquePtr<nvinfer1::IExecutionContext> mPreprocessorContext{nullptr}, mPredictionContext{nullptr};

	samplesCommon::ManagedBuffer mInput{}; //!< Host and device buffers for the input.
	samplesCommon::DeviceBuffer mPredictionInput{}; //!< Device buffer for the output of the preprocessor, i.e. the
	//!< input to the prediction model.
	samplesCommon::ManagedBuffer mOutput{}; //!< Host buffer for the ouptut

	template <typename T>
	SampleUniquePtr<T> makeUnique(T* t)
	{
	return SampleUniquePtr<T>{t};
	}
	};

	//!
	//! \brief Builds the two engines required for inference.
	//!
	//! \details This function creates one TensorRT engine for resizing inputs to the correct sizes,
	//! then creates a TensorRT network by parsing the ONNX model and builds
	//! an engine that will be used to run inference (mPredictionEngine).
	//!
	//! \return false if error in build preprocessor or predict engine.
	//!
	bool SampleDynamicReshape::build()
	{
	auto builder = makeUnique(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
	if (!builder)
	{
	sample::gLogError << "Create inference builder failed." << std::endl;
	return false;
	}

	mRuntime = makeUnique(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger()));
	if (!mRuntime)
	{
	sample::gLogError << "Runtime object creation failed." << std::endl;
	return false;
	}

	// This function will also set mPredictionInputDims and mPredictionOutputDims,
	// so it needs to be called before building the preprocessor.
	try
	{
	// CUDA stream used for profiling by the builder.
	auto profileStream = samplesCommon::makeCudaStream();
	if (!profileStream)
	{
	return false;
	}

	bool result = buildPredictionEngine(builder, mRuntime, *profileStream)
	&& buildPreprocessorEngine(builder, mRuntime, *profileStream);
	return result;
	}
	catch (std::runtime_error& e)
	{
	sample::gLogError << e.what() << std::endl;
	return false;
	}
	}

	//!
	//! \brief Builds an engine for preprocessing (mPreprocessorEngine).
	//!
	//! \return false if error in build preprocessor engine.
	//!
	bool SampleDynamicReshape::buildPreprocessorEngine(const SampleUniquePtr<nvinfer1::IBuilder>& builder,
	const SampleUniquePtr<nvinfer1::IRuntime>& runtime, cudaStream_t profileStream)
	{
	// Create the preprocessor engine using a network that supports full dimensions (createNetworkV2).
	auto preprocessorNetwork = makeUnique(builder->createNetworkV2(0));
	if (!preprocessorNetwork)
	{
	sample::gLogError << "Create network failed." << std::endl;
	return false;
	}

	// Reshape a dynamically shaped input to the size expected by the model, (1, 1, 28, 28).
	auto input = preprocessorNetwork->addInput("input", nvinfer1::DataType::kFLOAT, Dims4{-1, 1, -1, -1});
	auto resizeLayer = preprocessorNetwork->addResize(*input);
	resizeLayer->setOutputDimensions(mPredictionInputDims);
	preprocessorNetwork->markOutput(*resizeLayer->getOutput(0));

	// Finally, configure and build the preprocessor engine.
	auto preprocessorConfig = makeUnique(builder->createBuilderConfig());
	if (!preprocessorConfig)
	{
	sample::gLogError << "Create builder config failed." << std::endl;
	return false;
	}

	// Create an optimization profile so that we can specify a range of input dimensions.
	auto profile = builder->createOptimizationProfile();
	// This profile will be valid for all images whose size falls in the range of [(1, 1, 1, 1), (1, 1, 56, 56)]
	// but TensorRT will optimize for (1, 1, 28, 28)
	// We do not need to check the return of setDimension and addOptimizationProfile here as all dims are explicitly set
	profile->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{1, 1, 1, 1});
	profile->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{1, 1, 28, 28});
	profile->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{1, 1, 56, 56});
	preprocessorConfig->addOptimizationProfile(profile);

	// Create a calibration profile.
	auto profileCalib = builder->createOptimizationProfile();
	const int calibBatchSize{256};
	// We do not need to check the return of setDimension and setCalibrationProfile here as all dims are explicitly set
	profileCalib->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{calibBatchSize, 1, 28, 28});
	profileCalib->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{calibBatchSize, 1, 28, 28});
	profileCalib->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{calibBatchSize, 1, 28, 28});
	preprocessorConfig->setCalibrationProfile(profileCalib);
	preprocessorConfig->setProfileStream(profileStream);

	std::unique_ptr<IInt8Calibrator> calibrator;
	if (mParams.int8)
	{
	preprocessorConfig->setFlag(BuilderFlag::kINT8);
	const int nCalibBatches{10};
	MNISTBatchStream calibrationStream(
	calibBatchSize, nCalibBatches, "train-images-idx3-ubyte", "train-labels-idx1-ubyte", mParams.dataDirs);
	calibrator.reset(
	new Int8EntropyCalibrator2<MNISTBatchStream>(calibrationStream, 0, "MNISTPreprocessor", "input"));
	preprocessorConfig->setInt8Calibrator(calibrator.get());
	}

	SampleUniquePtr<nvinfer1::ITimingCache> timingCache{};

	// Load timing cache
	if (!mParams.timingCacheFile.empty())
	{
	timingCache = samplesCommon::buildTimingCacheFromFile(
	sample::gLogger.getTRTLogger(), *preprocessorConfig, mParams.timingCacheFile);
	}

	SampleUniquePtr<nvinfer1::IHostMemory> preprocessorPlan
	= makeUnique(builder->buildSerializedNetwork(preprocessorNetwork, preprocessorConfig));
	if (!preprocessorPlan)
	{
	sample::gLogError << "Preprocessor serialized engine build failed." << std::endl;
	return false;
	}

	if (timingCache != nullptr && !mParams.timingCacheFile.empty())
	{
	samplesCommon::updateTimingCacheFile(
	sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
	}

	mPreprocessorEngine
	= makeUnique(runtime->deserializeCudaEngine(preprocessorPlan->data(), preprocessorPlan->size()));
	if (!mPreprocessorEngine)
	{
	sample::gLogError << "Preprocessor engine deserialization failed." << std::endl;
	return false;
	}

	auto const tensorName = mPreprocessorEngine->getIOTensorName(0);

	sample::gLogInfo << "Profile dimensions in preprocessor engine:" << std::endl;
	sample::gLogInfo << " Minimum = " << mPreprocessorEngine->getProfileShape(tensorName, 0, OptProfileSelector::kMIN)
	<< std::endl;
	sample::gLogInfo << " Optimum = " << mPreprocessorEngine->getProfileShape(tensorName, 0, OptProfileSelector::kOPT)
	<< std::endl;
	sample::gLogInfo << " Maximum = " << mPreprocessorEngine->getProfileShape(tensorName, 0, OptProfileSelector::kMAX)
	<< std::endl;


	return true;
	}

	//!
	//! \brief Builds an engine for prediction (mPredictionEngine).
	//!
	//! \details This function builds an engine for the MNIST model, and updates mPredictionInputDims and
	//! mPredictionOutputDims according to the dimensions specified by the model. The preprocessor reshapes inputs to
	//! mPredictionInputDims.
	//!
	//! \return false if error in build prediction engine.
	//!
	bool SampleDynamicReshape::buildPredictionEngine(const SampleUniquePtr<nvinfer1::IBuilder>& builder,
	const SampleUniquePtr<nvinfer1::IRuntime>& runtime, cudaStream_t profileStream)
	{
	// Create a network using the parser.
	auto network = makeUnique(builder->createNetworkV2(0));
	if (!network)
	{
	sample::gLogError << "Create network failed." << std::endl;
	return false;
	}

	auto parser = samplesCommon::infer_object(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
	bool parsingSuccess
	= parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(),
	static_cast<int>(sample::gLogger.getReportableSeverity()));
	if (!parsingSuccess)
	{
	sample::gLogError << "Failed to parse model." << std::endl;
	return false;
	}

	// Attach a softmax layer to the end of the network.
	auto softmax = network->addSoftMax(*network->getOutput(0));
	// Set softmax axis to 1 since network output has shape [1, 10] in full dims mode
	softmax->setAxes(1 << 1);
	network->unmarkOutput(*network->getOutput(0));
	network->markOutput(*softmax->getOutput(0));

	// Get information about the inputs/outputs directly from the model.
	mPredictionInputDims = network->getInput(0)->getDimensions();
	mPredictionOutputDims = network->getOutput(0)->getDimensions();

	// Create a builder config
	auto config = makeUnique(builder->createBuilderConfig());
	if (!config)
	{
	sample::gLogError << "Create builder config failed." << std::endl;
	return false;
	}
	if (mParams.fp16)
	{
	config->setFlag(BuilderFlag::kFP16);
	}
	if (mParams.bf16)
	{
	config->setFlag(BuilderFlag::kBF16);
	}
	config->setProfileStream(profileStream);

	auto profileCalib = builder->createOptimizationProfile();
	const auto inputName = mParams.inputTensorNames[0].c_str();
	const int calibBatchSize{1};
	// We do not need to check the return of setDimension and setCalibrationProfile here as all dims are explicitly set
	profileCalib->setDimensions(inputName, OptProfileSelector::kMIN, Dims4{calibBatchSize, 1, 28, 28});
	profileCalib->setDimensions(inputName, OptProfileSelector::kOPT, Dims4{calibBatchSize, 1, 28, 28});
	profileCalib->setDimensions(inputName, OptProfileSelector::kMAX, Dims4{calibBatchSize, 1, 28, 28});
	config->setCalibrationProfile(profileCalib);

	std::unique_ptr<IInt8Calibrator> calibrator;
	if (mParams.int8)
	{
	config->setFlag(BuilderFlag::kINT8);
	int nCalibBatches{10};
	MNISTBatchStream calibrationStream(
	calibBatchSize, nCalibBatches, "train-images-idx3-ubyte", "train-labels-idx1-ubyte", mParams.dataDirs);
	calibrator.reset(
	new Int8EntropyCalibrator2<MNISTBatchStream>(calibrationStream, 0, "MNISTPrediction", inputName));
	config->setInt8Calibrator(calibrator.get());
	}
	// Build the prediciton engine.
	SampleUniquePtr<nvinfer1::ITimingCache> timingCache{};

	// Load timing cache
	if (!mParams.timingCacheFile.empty())
	{
	timingCache
	= samplesCommon::buildTimingCacheFromFile(sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile);
	}

	// Build the prediction engine.
	SampleUniquePtr<nvinfer1::IHostMemory> predictionPlan
	= makeUnique(builder->buildSerializedNetwork(network, config));
	if (!predictionPlan)
	{
	sample::gLogError << "Prediction serialized engine build failed." << std::endl;
	return false;
	}

	if (timingCache != nullptr && !mParams.timingCacheFile.empty())
	{
	samplesCommon::updateTimingCacheFile(
	sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
	}

	mPredictionEngine = makeUnique(runtime->deserializeCudaEngine(predictionPlan->data(), predictionPlan->size()));
	if (!mPredictionEngine)
	{
	sample::gLogError << "Prediction engine deserialization failed." << std::endl;
	return false;
	}

	return true;
	}

	//!
	//! \brief Prepares the model for inference by creating an execution context and allocating buffers.
	//!
	//! \details This function sets up the sample for inference. This involves allocating buffers for the inputs and
	//! outputs, as well as creating TensorRT execution contexts for both engines. This only needs to be called a single
	//! time.
	//!
	//! \return false if error in build preprocessor or predict context.
	//!
	bool SampleDynamicReshape::prepare()
	{
	mPreprocessorContext = makeUnique(mPreprocessorEngine->createExecutionContext());
	if (!mPreprocessorContext)
	{
	sample::gLogError << "Preprocessor context build failed." << std::endl;
	return false;
	}


	mPredictionContext = makeUnique(mPredictionEngine->createExecutionContext());
	if (!mPredictionContext)
	{
	sample::gLogError << "Prediction context build failed." << std::endl;
	return false;
	}

	// Since input dimensions are not known ahead of time, we only allocate the output buffer and preprocessor output
	// buffer.
	mPredictionInput.resize(mPredictionInputDims);
	mOutput.hostBuffer.resize(mPredictionOutputDims);
	mOutput.deviceBuffer.resize(mPredictionOutputDims);
	return true;
	}

	//!
	//! \brief Runs inference for this sample
	//!
	//! \details This function is the main execution function of the sample.
	//! It runs inference for using a random image from the MNIST dataset as an input.
	//!
	bool SampleDynamicReshape::infer()
	{
	// Load a random PGM file into a host buffer, then copy to device.
	std::random_device rd{};
	std::default_random_engine generator{rd()};
	std::uniform_int_distribution<int> digitDistribution{0, 9};
	int digit = digitDistribution(generator);

	Dims inputDims = loadPGMFile(samplesCommon::locateFile(std::to_string(digit) + ".pgm", mParams.dataDirs));
	mInput.deviceBuffer.resize(inputDims);
	CHECK(cudaMemcpy(
	mInput.deviceBuffer.data(), mInput.hostBuffer.data(), mInput.hostBuffer.nbBytes(), cudaMemcpyHostToDevice));

	// Set the input size for the preprocessor
	CHECK_RETURN_W_MSG(mPreprocessorContext->setInputShape("input", inputDims), false, "Invalid binding dimensions.");

	// We can only run inference once all dynamic input shapes have been specified.
	if (!mPreprocessorContext->allInputDimensionsSpecified())
	{
	return false;
	}

	// Run the preprocessor to resize the input to the correct shape
	std::vector<void*> preprocessorBindings = {mInput.deviceBuffer.data(), mPredictionInput.data()};
	// For engines using full dims, we can use executeV2, which does not include a separate batch size parameter.
	bool status = mPreprocessorContext->executeV2(preprocessorBindings.data());
	if (!status)
	{
	return false;
	}

	// Next, run the model to generate a prediction.
	std::vector<void*> predicitonBindings = {mPredictionInput.data(), mOutput.deviceBuffer.data()};
	status = mPredictionContext->executeV2(predicitonBindings.data());
	if (!status)
	{
	return false;
	}

	// Copy the outputs back to the host and verify the output.
	CHECK(cudaMemcpy(mOutput.hostBuffer.data(), mOutput.deviceBuffer.data(), mOutput.deviceBuffer.nbBytes(),
	cudaMemcpyDeviceToHost));
	return validateOutput(digit);
	}

	//!
	//! \brief Loads a PGM file into mInput and returns the dimensions of the loaded image.
	//!
	//! \details This function loads the specified PGM file into the input host buffer.
	//!
	Dims SampleDynamicReshape::loadPGMFile(const std::string& fileName)
	{
	std::ifstream infile(fileName, std::ifstream::binary);
	ASSERT(infile.is_open() && "Attempting to read from a file that is not open.");

	std::string magic;
	int h, w, max;
	infile >> magic >> h >> w >> max;

	infile.seekg(1, infile.cur);
	Dims4 inputDims{1, 1, h, w};
	size_t vol = samplesCommon::volume(inputDims);
	std::vector<uint8_t> fileData(vol);
	infile.read(reinterpret_cast<char*>(fileData.data()), vol);

	// Print an ascii representation
	sample::gLogInfo << "Input:\n";
	for (size_t i = 0; i < vol; i++)
	{
	sample::gLogInfo << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % w) ? "" : "\n");
	}
	sample::gLogInfo << std::endl;

	// Normalize and copy to the host buffer.
	mInput.hostBuffer.resize(inputDims);
	float* hostDataBuffer = static_cast<float*>(mInput.hostBuffer.data());
	std::transform(fileData.begin(), fileData.end(), hostDataBuffer,
	[](uint8_t x) { return 1.0 - static_cast<float>(x / 255.0); });
	return inputDims;
	}

	//!
	//! \brief Checks whether the model prediction (in mOutput) is correct.
	//!
	bool SampleDynamicReshape::validateOutput(int digit)
	{
	const float* bufRaw = static_cast<const float*>(mOutput.hostBuffer.data());
	std::vector<float> prob(bufRaw, bufRaw + mOutput.hostBuffer.size());

	int curIndex{0};
	for (const auto& elem : prob)
	{
	sample::gLogInfo << " Prob " << curIndex << " " << std::fixed << std::setw(5) << std::setprecision(4) << elem
	<< " "
	<< "Class " << curIndex << ": " << std::string(int(std::floor(elem * 10 + 0.5F)), '*')
	<< std::endl;
	++curIndex;
	}

	int predictedDigit = std::max_element(prob.begin(), prob.end()) - prob.begin();
	return digit == predictedDigit;
	}

	//!
	//! \brief Initializes members of the params struct using the command line args
	//!
	samplesCommon::OnnxSampleParams initializeSampleParams(const samplesCommon::Args& args)
	{
	samplesCommon::OnnxSampleParams params;
	if (args.dataDirs.empty()) // Use default directories if user hasn't provided directory paths
	{
	params.dataDirs.push_back("data/mnist/");
	params.dataDirs.push_back("data/samples/mnist/");
	}
	else // Use the data directory provided by the user
	{
	params.dataDirs = args.dataDirs;
	}
	params.onnxFileName = "mnist.onnx";
	params.inputTensorNames.push_back("Input3");
	params.outputTensorNames.push_back("Plus214_Output_0");
	params.int8 = args.runInInt8;
	params.fp16 = args.runInFp16;
	params.bf16 = args.runInBf16;
	params.timingCacheFile = args.timingCacheFile;
	return params;
	}

	//!
	//! \brief Prints the help information for running this sample
	//!
	void printHelpInfo()
	{
	std::cout << "Usage: ./sample_dynamic_reshape [-h or --help] [-d or --datadir=<path to data directory>] "
	"[--timingCacheFile=<path to timing cache file>]"
	<< std::endl;
	std::cout << "--help, -h Display help information" << std::endl;
	std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used "
	"multiple times to add multiple directories. If no data directories are given, the default is to use "
	"(data/samples/mnist/, data/mnist/)"
	<< std::endl;
	std::cout << "--timingCacheFile Specify path to a timing cache file. If it does not already exist, it will be "
	<< "created." << std::endl;
	std::cout << "--int8 Run in Int8 mode." << std::endl;
	std::cout << "--fp16 Run in FP16 mode." << std::endl;
	std::cout << "--bf16 Run in BF16 mode." << std::endl;
	}

	int main(int argc, char** argv)
	{
	samplesCommon::Args args;
	bool argsOK = samplesCommon::parseArgs(args, argc, argv);
	if (!argsOK)
	{
	sample::gLogError << "Invalid arguments" << std::endl;
	printHelpInfo();
	return EXIT_FAILURE;
	}
	if (args.help)
	{
	printHelpInfo();
	return EXIT_SUCCESS;
	}

	auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);

	sample::gLogger.reportTestStart(sampleTest);

	SampleDynamicReshape sample{initializeSampleParams(args)};

	if (!sample.build())
	{
	return sample::gLogger.reportFail(sampleTest);
	}
	if (!sample.prepare())
	{
	return sample::gLogger.reportFail(sampleTest);
	}
	if (!sample.infer())
	{
	return sample::gLogger.reportFail(sampleTest);
	}
	return sample::gLogger.reportPass(sampleTest);
	}