/* * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //! //! sampleDynamicReshape.cpp //! This file contains the implementation of the dynamic reshape MNIST sample. It creates a network //! using the MNIST ONNX model, and uses a second engine to resize inputs to the shape the model //! expects. //! It can be run with the following command: //! Command: ./sample_dynamic_reshape [-h or --help [-d=/path/to/data/dir or --datadir=/path/to/data/dir] //! // Define TRT entrypoints used in common code #define DEFINE_TRT_ENTRYPOINTS 1 #define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 0 #include "BatchStream.h" #include "EntropyCalibrator.h" #include "argsParser.h" #include "buffers.h" #include "common.h" #include "logger.h" #include "parserOnnxConfig.h" #include "NvInfer.h" #include #include using namespace nvinfer1; using samplesCommon::SampleUniquePtr; const std::string gSampleName = "TensorRT.sample_dynamic_reshape"; //! \brief The SampleDynamicReshape class implementes the dynamic reshape sample. //! //! \details This class builds one engine that resizes a given input to the correct size, and a //! second engine based on an ONNX MNIST model that generates a prediction. //! class SampleDynamicReshape { public: SampleDynamicReshape(const samplesCommon::OnnxSampleParams& params) : mParams(params) { } //! //! \brief Builds both engines. //! bool build(); //! //! \brief Prepares the model for inference by creating execution contexts and allocating buffers. //! bool prepare(); //! //! \brief Runs inference using TensorRT on a random image. //! bool infer(); private: bool buildPreprocessorEngine(const SampleUniquePtr& builder, const SampleUniquePtr& runtime, cudaStream_t profileStream); bool buildPredictionEngine(const SampleUniquePtr& builder, const SampleUniquePtr& runtime, cudaStream_t profileStream); Dims loadPGMFile(const std::string& fileName); bool validateOutput(int digit); samplesCommon::OnnxSampleParams mParams; //!< The parameters for the sample. nvinfer1::Dims mPredictionInputDims; //!< The dimensions of the input of the MNIST model. nvinfer1::Dims mPredictionOutputDims; //!< The dimensions of the output of the MNIST model. SampleUniquePtr mRuntime{nullptr}; // Engine plan files used for inference. One for resizing inputs, another for prediction. SampleUniquePtr mPreprocessorEngine{nullptr}, mPredictionEngine{nullptr}; SampleUniquePtr mPreprocessorContext{nullptr}, mPredictionContext{nullptr}; samplesCommon::ManagedBuffer mInput{}; //!< Host and device buffers for the input. samplesCommon::DeviceBuffer mPredictionInput{}; //!< Device buffer for the output of the preprocessor, i.e. the //!< input to the prediction model. samplesCommon::ManagedBuffer mOutput{}; //!< Host buffer for the ouptut template SampleUniquePtr makeUnique(T* t) { return SampleUniquePtr{t}; } }; //! //! \brief Builds the two engines required for inference. //! //! \details This function creates one TensorRT engine for resizing inputs to the correct sizes, //! then creates a TensorRT network by parsing the ONNX model and builds //! an engine that will be used to run inference (mPredictionEngine). //! //! \return false if error in build preprocessor or predict engine. //! bool SampleDynamicReshape::build() { auto builder = makeUnique(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); if (!builder) { sample::gLogError << "Create inference builder failed." << std::endl; return false; } mRuntime = makeUnique(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger())); if (!mRuntime) { sample::gLogError << "Runtime object creation failed." << std::endl; return false; } // This function will also set mPredictionInputDims and mPredictionOutputDims, // so it needs to be called before building the preprocessor. try { // CUDA stream used for profiling by the builder. auto profileStream = samplesCommon::makeCudaStream(); if (!profileStream) { return false; } bool result = buildPredictionEngine(builder, mRuntime, *profileStream) && buildPreprocessorEngine(builder, mRuntime, *profileStream); return result; } catch (std::runtime_error& e) { sample::gLogError << e.what() << std::endl; return false; } } //! //! \brief Builds an engine for preprocessing (mPreprocessorEngine). //! //! \return false if error in build preprocessor engine. //! bool SampleDynamicReshape::buildPreprocessorEngine(const SampleUniquePtr& builder, const SampleUniquePtr& runtime, cudaStream_t profileStream) { // Create the preprocessor engine using a network that supports full dimensions (createNetworkV2). auto preprocessorNetwork = makeUnique(builder->createNetworkV2(0)); if (!preprocessorNetwork) { sample::gLogError << "Create network failed." << std::endl; return false; } // Reshape a dynamically shaped input to the size expected by the model, (1, 1, 28, 28). auto input = preprocessorNetwork->addInput("input", nvinfer1::DataType::kFLOAT, Dims4{-1, 1, -1, -1}); auto resizeLayer = preprocessorNetwork->addResize(*input); resizeLayer->setOutputDimensions(mPredictionInputDims); preprocessorNetwork->markOutput(*resizeLayer->getOutput(0)); // Finally, configure and build the preprocessor engine. auto preprocessorConfig = makeUnique(builder->createBuilderConfig()); if (!preprocessorConfig) { sample::gLogError << "Create builder config failed." << std::endl; return false; } // Create an optimization profile so that we can specify a range of input dimensions. auto profile = builder->createOptimizationProfile(); // This profile will be valid for all images whose size falls in the range of [(1, 1, 1, 1), (1, 1, 56, 56)] // but TensorRT will optimize for (1, 1, 28, 28) // We do not need to check the return of setDimension and addOptimizationProfile here as all dims are explicitly set profile->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{1, 1, 1, 1}); profile->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{1, 1, 28, 28}); profile->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{1, 1, 56, 56}); preprocessorConfig->addOptimizationProfile(profile); // Create a calibration profile. auto profileCalib = builder->createOptimizationProfile(); const int calibBatchSize{256}; // We do not need to check the return of setDimension and setCalibrationProfile here as all dims are explicitly set profileCalib->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{calibBatchSize, 1, 28, 28}); profileCalib->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{calibBatchSize, 1, 28, 28}); profileCalib->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{calibBatchSize, 1, 28, 28}); preprocessorConfig->setCalibrationProfile(profileCalib); preprocessorConfig->setProfileStream(profileStream); std::unique_ptr calibrator; if (mParams.int8) { preprocessorConfig->setFlag(BuilderFlag::kINT8); const int nCalibBatches{10}; MNISTBatchStream calibrationStream( calibBatchSize, nCalibBatches, "train-images-idx3-ubyte", "train-labels-idx1-ubyte", mParams.dataDirs); calibrator.reset( new Int8EntropyCalibrator2(calibrationStream, 0, "MNISTPreprocessor", "input")); preprocessorConfig->setInt8Calibrator(calibrator.get()); } SampleUniquePtr timingCache{}; // Load timing cache if (!mParams.timingCacheFile.empty()) { timingCache = samplesCommon::buildTimingCacheFromFile( sample::gLogger.getTRTLogger(), *preprocessorConfig, mParams.timingCacheFile); } SampleUniquePtr preprocessorPlan = makeUnique(builder->buildSerializedNetwork(*preprocessorNetwork, *preprocessorConfig)); if (!preprocessorPlan) { sample::gLogError << "Preprocessor serialized engine build failed." << std::endl; return false; } if (timingCache != nullptr && !mParams.timingCacheFile.empty()) { samplesCommon::updateTimingCacheFile( sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder); } mPreprocessorEngine = makeUnique(runtime->deserializeCudaEngine(preprocessorPlan->data(), preprocessorPlan->size())); if (!mPreprocessorEngine) { sample::gLogError << "Preprocessor engine deserialization failed." << std::endl; return false; } auto const tensorName = mPreprocessorEngine->getIOTensorName(0); sample::gLogInfo << "Profile dimensions in preprocessor engine:" << std::endl; sample::gLogInfo << " Minimum = " << mPreprocessorEngine->getProfileShape(tensorName, 0, OptProfileSelector::kMIN) << std::endl; sample::gLogInfo << " Optimum = " << mPreprocessorEngine->getProfileShape(tensorName, 0, OptProfileSelector::kOPT) << std::endl; sample::gLogInfo << " Maximum = " << mPreprocessorEngine->getProfileShape(tensorName, 0, OptProfileSelector::kMAX) << std::endl; return true; } //! //! \brief Builds an engine for prediction (mPredictionEngine). //! //! \details This function builds an engine for the MNIST model, and updates mPredictionInputDims and //! mPredictionOutputDims according to the dimensions specified by the model. The preprocessor reshapes inputs to //! mPredictionInputDims. //! //! \return false if error in build prediction engine. //! bool SampleDynamicReshape::buildPredictionEngine(const SampleUniquePtr& builder, const SampleUniquePtr& runtime, cudaStream_t profileStream) { // Create a network using the parser. auto network = makeUnique(builder->createNetworkV2(0)); if (!network) { sample::gLogError << "Create network failed." << std::endl; return false; } auto parser = samplesCommon::infer_object(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger())); bool parsingSuccess = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), static_cast(sample::gLogger.getReportableSeverity())); if (!parsingSuccess) { sample::gLogError << "Failed to parse model." << std::endl; return false; } // Attach a softmax layer to the end of the network. auto softmax = network->addSoftMax(*network->getOutput(0)); // Set softmax axis to 1 since network output has shape [1, 10] in full dims mode softmax->setAxes(1 << 1); network->unmarkOutput(*network->getOutput(0)); network->markOutput(*softmax->getOutput(0)); // Get information about the inputs/outputs directly from the model. mPredictionInputDims = network->getInput(0)->getDimensions(); mPredictionOutputDims = network->getOutput(0)->getDimensions(); // Create a builder config auto config = makeUnique(builder->createBuilderConfig()); if (!config) { sample::gLogError << "Create builder config failed." << std::endl; return false; } if (mParams.fp16) { config->setFlag(BuilderFlag::kFP16); } if (mParams.bf16) { config->setFlag(BuilderFlag::kBF16); } config->setProfileStream(profileStream); auto profileCalib = builder->createOptimizationProfile(); const auto inputName = mParams.inputTensorNames[0].c_str(); const int calibBatchSize{1}; // We do not need to check the return of setDimension and setCalibrationProfile here as all dims are explicitly set profileCalib->setDimensions(inputName, OptProfileSelector::kMIN, Dims4{calibBatchSize, 1, 28, 28}); profileCalib->setDimensions(inputName, OptProfileSelector::kOPT, Dims4{calibBatchSize, 1, 28, 28}); profileCalib->setDimensions(inputName, OptProfileSelector::kMAX, Dims4{calibBatchSize, 1, 28, 28}); config->setCalibrationProfile(profileCalib); std::unique_ptr calibrator; if (mParams.int8) { config->setFlag(BuilderFlag::kINT8); int nCalibBatches{10}; MNISTBatchStream calibrationStream( calibBatchSize, nCalibBatches, "train-images-idx3-ubyte", "train-labels-idx1-ubyte", mParams.dataDirs); calibrator.reset( new Int8EntropyCalibrator2(calibrationStream, 0, "MNISTPrediction", inputName)); config->setInt8Calibrator(calibrator.get()); } // Build the prediciton engine. SampleUniquePtr timingCache{}; // Load timing cache if (!mParams.timingCacheFile.empty()) { timingCache = samplesCommon::buildTimingCacheFromFile(sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile); } // Build the prediction engine. SampleUniquePtr predictionPlan = makeUnique(builder->buildSerializedNetwork(*network, *config)); if (!predictionPlan) { sample::gLogError << "Prediction serialized engine build failed." << std::endl; return false; } if (timingCache != nullptr && !mParams.timingCacheFile.empty()) { samplesCommon::updateTimingCacheFile( sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder); } mPredictionEngine = makeUnique(runtime->deserializeCudaEngine(predictionPlan->data(), predictionPlan->size())); if (!mPredictionEngine) { sample::gLogError << "Prediction engine deserialization failed." << std::endl; return false; } return true; } //! //! \brief Prepares the model for inference by creating an execution context and allocating buffers. //! //! \details This function sets up the sample for inference. This involves allocating buffers for the inputs and //! outputs, as well as creating TensorRT execution contexts for both engines. This only needs to be called a single //! time. //! //! \return false if error in build preprocessor or predict context. //! bool SampleDynamicReshape::prepare() { mPreprocessorContext = makeUnique(mPreprocessorEngine->createExecutionContext()); if (!mPreprocessorContext) { sample::gLogError << "Preprocessor context build failed." << std::endl; return false; } mPredictionContext = makeUnique(mPredictionEngine->createExecutionContext()); if (!mPredictionContext) { sample::gLogError << "Prediction context build failed." << std::endl; return false; } // Since input dimensions are not known ahead of time, we only allocate the output buffer and preprocessor output // buffer. mPredictionInput.resize(mPredictionInputDims); mOutput.hostBuffer.resize(mPredictionOutputDims); mOutput.deviceBuffer.resize(mPredictionOutputDims); return true; } //! //! \brief Runs inference for this sample //! //! \details This function is the main execution function of the sample. //! It runs inference for using a random image from the MNIST dataset as an input. //! bool SampleDynamicReshape::infer() { // Load a random PGM file into a host buffer, then copy to device. std::random_device rd{}; std::default_random_engine generator{rd()}; std::uniform_int_distribution digitDistribution{0, 9}; int digit = digitDistribution(generator); Dims inputDims = loadPGMFile(samplesCommon::locateFile(std::to_string(digit) + ".pgm", mParams.dataDirs)); mInput.deviceBuffer.resize(inputDims); CHECK(cudaMemcpy( mInput.deviceBuffer.data(), mInput.hostBuffer.data(), mInput.hostBuffer.nbBytes(), cudaMemcpyHostToDevice)); // Set the input size for the preprocessor CHECK_RETURN_W_MSG(mPreprocessorContext->setInputShape("input", inputDims), false, "Invalid binding dimensions."); // We can only run inference once all dynamic input shapes have been specified. if (!mPreprocessorContext->allInputDimensionsSpecified()) { return false; } // Run the preprocessor to resize the input to the correct shape std::vector preprocessorBindings = {mInput.deviceBuffer.data(), mPredictionInput.data()}; // For engines using full dims, we can use executeV2, which does not include a separate batch size parameter. bool status = mPreprocessorContext->executeV2(preprocessorBindings.data()); if (!status) { return false; } // Next, run the model to generate a prediction. std::vector predicitonBindings = {mPredictionInput.data(), mOutput.deviceBuffer.data()}; status = mPredictionContext->executeV2(predicitonBindings.data()); if (!status) { return false; } // Copy the outputs back to the host and verify the output. CHECK(cudaMemcpy(mOutput.hostBuffer.data(), mOutput.deviceBuffer.data(), mOutput.deviceBuffer.nbBytes(), cudaMemcpyDeviceToHost)); return validateOutput(digit); } //! //! \brief Loads a PGM file into mInput and returns the dimensions of the loaded image. //! //! \details This function loads the specified PGM file into the input host buffer. //! Dims SampleDynamicReshape::loadPGMFile(const std::string& fileName) { std::ifstream infile(fileName, std::ifstream::binary); ASSERT(infile.is_open() && "Attempting to read from a file that is not open."); std::string magic; int h, w, max; infile >> magic >> h >> w >> max; infile.seekg(1, infile.cur); Dims4 inputDims{1, 1, h, w}; size_t vol = samplesCommon::volume(inputDims); std::vector fileData(vol); infile.read(reinterpret_cast(fileData.data()), vol); // Print an ascii representation sample::gLogInfo << "Input:\n"; for (size_t i = 0; i < vol; i++) { sample::gLogInfo << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % w) ? "" : "\n"); } sample::gLogInfo << std::endl; // Normalize and copy to the host buffer. mInput.hostBuffer.resize(inputDims); float* hostDataBuffer = static_cast(mInput.hostBuffer.data()); std::transform(fileData.begin(), fileData.end(), hostDataBuffer, [](uint8_t x) { return 1.0 - static_cast(x / 255.0); }); return inputDims; } //! //! \brief Checks whether the model prediction (in mOutput) is correct. //! bool SampleDynamicReshape::validateOutput(int digit) { const float* bufRaw = static_cast(mOutput.hostBuffer.data()); std::vector prob(bufRaw, bufRaw + mOutput.hostBuffer.size()); int curIndex{0}; for (const auto& elem : prob) { sample::gLogInfo << " Prob " << curIndex << " " << std::fixed << std::setw(5) << std::setprecision(4) << elem << " " << "Class " << curIndex << ": " << std::string(int(std::floor(elem * 10 + 0.5F)), '*') << std::endl; ++curIndex; } int predictedDigit = std::max_element(prob.begin(), prob.end()) - prob.begin(); return digit == predictedDigit; } //! //! \brief Initializes members of the params struct using the command line args //! samplesCommon::OnnxSampleParams initializeSampleParams(const samplesCommon::Args& args) { samplesCommon::OnnxSampleParams params; if (args.dataDirs.empty()) // Use default directories if user hasn't provided directory paths { params.dataDirs.push_back("data/mnist/"); params.dataDirs.push_back("data/samples/mnist/"); } else // Use the data directory provided by the user { params.dataDirs = args.dataDirs; } params.onnxFileName = "mnist.onnx"; params.inputTensorNames.push_back("Input3"); params.outputTensorNames.push_back("Plus214_Output_0"); params.int8 = args.runInInt8; params.fp16 = args.runInFp16; params.bf16 = args.runInBf16; params.timingCacheFile = args.timingCacheFile; return params; } //! //! \brief Prints the help information for running this sample //! void printHelpInfo() { std::cout << "Usage: ./sample_dynamic_reshape [-h or --help] [-d or --datadir=] " "[--timingCacheFile=]" << std::endl; std::cout << "--help, -h Display help information" << std::endl; std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used " "multiple times to add multiple directories. If no data directories are given, the default is to use " "(data/samples/mnist/, data/mnist/)" << std::endl; std::cout << "--timingCacheFile Specify path to a timing cache file. If it does not already exist, it will be " << "created." << std::endl; std::cout << "--int8 Run in Int8 mode." << std::endl; std::cout << "--fp16 Run in FP16 mode." << std::endl; std::cout << "--bf16 Run in BF16 mode." << std::endl; } int main(int argc, char** argv) { samplesCommon::Args args; bool argsOK = samplesCommon::parseArgs(args, argc, argv); if (!argsOK) { sample::gLogError << "Invalid arguments" << std::endl; printHelpInfo(); return EXIT_FAILURE; } if (args.help) { printHelpInfo(); return EXIT_SUCCESS; } auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv); sample::gLogger.reportTestStart(sampleTest); SampleDynamicReshape sample{initializeSampleParams(args)}; if (!sample.build()) { return sample::gLogger.reportFail(sampleTest); } if (!sample.prepare()) { return sample::gLogger.reportFail(sampleTest); } if (!sample.infer()) { return sample::gLogger.reportFail(sampleTest); } return sample::gLogger.reportPass(sampleTest); }