G0-VLA / g0plus_dockerfile /docker-assets /data /TensorRT-10.13.0.35 /samples /sampleNamedDimensions /sampleNamedDimensions.cpp
| /* | |
| * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| * SPDX-License-Identifier: Apache-2.0 | |
| * | |
| * Licensed under the Apache License, Version 2.0 (the "License"); | |
| * you may not use this file except in compliance with the License. | |
| * You may obtain a copy of the License at | |
| * | |
| * http://www.apache.org/licenses/LICENSE-2.0 | |
| * | |
| * Unless required by applicable law or agreed to in writing, software | |
| * distributed under the License is distributed on an "AS IS" BASIS, | |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| * See the License for the specific language governing permissions and | |
| * limitations under the License. | |
| */ | |
| //! | |
| //! sampleNamedDimensions.cpp | |
| //! This file contains the implementation of the named dimensions sample. It creates the network using | |
| //! a synthetic ONNX model with named input dimensions. | |
| //! It can be run with the following command line: | |
| //! Command: ./sample_named_dimensions [-h or --help] [-d=/path/to/data/dir or --datadir=/path/to/data/dir] | |
| //! | |
| // Define TRT entrypoints used in common code | |
| using namespace nvinfer1; | |
| using samplesCommon::SampleUniquePtr; | |
| std::string const gSampleName = "TensorRT.sample_named_dimensions"; | |
| //! \brief The SampleNamedDimensions class implements a sample with named input dimensions | |
| //! | |
| //! \details It creates the network using an ONNX model | |
| //! | |
| class SampleNamedDimensions | |
| { | |
| public: | |
| SampleNamedDimensions(samplesCommon::OnnxSampleParams const& params) | |
| : mParams(params) | |
| , mEngine(nullptr) | |
| { | |
| } | |
| //! \brief Adds an optimization profile for dynamic shapes | |
| void setNamedDimension(int32_t dim); | |
| //! | |
| //! \brief Function builds the network engine | |
| //! | |
| bool build(); | |
| //! | |
| //! \brief Runs the TensorRT inference engine for this sample | |
| //! | |
| bool infer(); | |
| private: | |
| samplesCommon::OnnxSampleParams mParams; //!< The parameters for the sample. | |
| std::vector<nvinfer1::Dims> mInputDims; //!< The dimensions of the inputs to the network. | |
| std::vector<nvinfer1::Dims> mOutputDims; //!< The dimensions of the outputs to the network. | |
| int32_t mNamedDimension; //!< The value of the named dimension. | |
| //! Input Tensors. | |
| std::vector<float> mInput0; | |
| std::vector<float> mInput1; | |
| SampleUniquePtr<IRuntime> mRuntime{}; //!< The TensorRT Runtime used to deserialize the engine. | |
| std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The TensorRT engine used to run the network | |
| //! | |
| //! \brief Parses a synthetic ONNX model and creates a TensorRT network | |
| //! | |
| bool constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder, | |
| SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config, | |
| SampleUniquePtr<nvonnxparser::IParser>& parser); | |
| //! | |
| //! \brief Adds an optimization profile for dynamic shapes | |
| //! | |
| void addOptimizationProfile(SampleUniquePtr<nvinfer1::IBuilderConfig>& config, | |
| SampleUniquePtr<nvinfer1::IBuilder>& builder); | |
| //! | |
| //! \brief Reads the input and stores the result in a managed buffer | |
| //! | |
| bool processInput(samplesCommon::BufferManager const& buffers); | |
| //! | |
| //! \brief Classifies digits and verify result | |
| //! | |
| bool verifyOutput(samplesCommon::BufferManager const& buffers); | |
| }; | |
| //! | |
| //! \brief Sets the value of the named input dimension | |
| //! | |
| void SampleNamedDimensions::setNamedDimension(int32_t dim) | |
| { | |
| mNamedDimension = dim; | |
| } | |
| //! | |
| //! \brief Creates the network, configures the builder and creates the network engine | |
| //! | |
| //! \details This function creates the network definition by parsing the Onnx model and builds | |
| //! the engine that will be used to run the model (mEngine) | |
| //! | |
| //! \return true if the engine was created successfully and false otherwise | |
| //! | |
| bool SampleNamedDimensions::build() | |
| { | |
| auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); | |
| if (!builder) | |
| { | |
| return false; | |
| } | |
| auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0)); | |
| if (!network) | |
| { | |
| return false; | |
| } | |
| auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig()); | |
| if (!config) | |
| { | |
| return false; | |
| } | |
| auto parser | |
| = SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger())); | |
| if (!parser) | |
| { | |
| return false; | |
| } | |
| auto constructed = constructNetwork(builder, network, config, parser); | |
| if (!constructed) | |
| { | |
| return false; | |
| } | |
| ASSERT(network->getNbInputs() == 2); | |
| mInputDims.push_back(network->getInput(0)->getDimensions()); | |
| mInputDims.push_back(network->getInput(1)->getDimensions()); | |
| ASSERT(mInputDims[0].nbDims == 2); | |
| ASSERT(mInputDims[1].nbDims == 2); | |
| ASSERT(network->getNbOutputs() == 1); | |
| mOutputDims.push_back(network->getOutput(0)->getDimensions()); | |
| ASSERT(mOutputDims[0].nbDims == 2); | |
| // CUDA stream used for profiling by the builder. | |
| auto profileStream = samplesCommon::makeCudaStream(); | |
| if (!profileStream) | |
| { | |
| return false; | |
| } | |
| config->setProfileStream(*profileStream); | |
| addOptimizationProfile(config, builder); | |
| SampleUniquePtr<nvinfer1::ITimingCache> timingCache{}; | |
| // Load timing cache | |
| if (!mParams.timingCacheFile.empty()) | |
| { | |
| timingCache | |
| = samplesCommon::buildTimingCacheFromFile(sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile); | |
| } | |
| SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)}; | |
| if (!plan) | |
| { | |
| return false; | |
| } | |
| if (timingCache != nullptr && !mParams.timingCacheFile.empty()) | |
| { | |
| samplesCommon::updateTimingCacheFile( | |
| sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder); | |
| } | |
| if (!mRuntime) | |
| { | |
| mRuntime = SampleUniquePtr<IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger())); | |
| } | |
| if (!mRuntime) | |
| { | |
| return false; | |
| } | |
| mEngine = std::shared_ptr<nvinfer1::ICudaEngine>( | |
| mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter()); | |
| if (!mEngine) | |
| { | |
| return false; | |
| } | |
| return true; | |
| } | |
| //! | |
| //! \brief Uses ONNX parser to create the ONNX Network and marks the output layers | |
| //! | |
| bool SampleNamedDimensions::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder, | |
| SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config, | |
| SampleUniquePtr<nvonnxparser::IParser>& parser) | |
| { | |
| auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), | |
| static_cast<int32_t>(sample::gLogger.getReportableSeverity())); | |
| if (!parsed) | |
| { | |
| return false; | |
| } | |
| return true; | |
| } | |
| //! | |
| //! \brief Adds an optimization profile for dynamic shapes | |
| //! | |
| void SampleNamedDimensions::addOptimizationProfile(SampleUniquePtr<nvinfer1::IBuilderConfig>& config, | |
| SampleUniquePtr<nvinfer1::IBuilder>& builder) | |
| { | |
| auto const input0ProfileDims = Dims2(mNamedDimension, mInputDims[0].d[1]); | |
| auto profile = builder->createOptimizationProfile(); | |
| profile->setDimensions("input0", OptProfileSelector::kMIN, input0ProfileDims); | |
| profile->setDimensions("input0", OptProfileSelector::kMAX, input0ProfileDims); | |
| profile->setDimensions("input0", OptProfileSelector::kOPT, input0ProfileDims); | |
| auto input1ProfileDims = Dims2(mNamedDimension, mInputDims[1].d[1]); | |
| profile->setDimensions("input1", OptProfileSelector::kMIN, input1ProfileDims); | |
| profile->setDimensions("input1", OptProfileSelector::kMAX, input1ProfileDims); | |
| profile->setDimensions("input1", OptProfileSelector::kOPT, input1ProfileDims); | |
| config->addOptimizationProfile(profile); | |
| } | |
| //! | |
| //! \brief Runs the TensorRT inference engine for this sample | |
| //! | |
| //! \details This function is the main execution function of the sample. It allocates the buffer, | |
| //! sets inputs and executes the engine. | |
| //! | |
| bool SampleNamedDimensions::infer() | |
| { | |
| // Create RAII buffer manager object | |
| samplesCommon::BufferManager buffers(mEngine); | |
| auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext()); | |
| if (!context) | |
| { | |
| return false; | |
| } | |
| for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++) | |
| { | |
| auto const name = mEngine->getIOTensorName(i); | |
| context->setTensorAddress(name, buffers.getDeviceBuffer(name)); | |
| } | |
| // Read the input data into the managed buffers | |
| ASSERT(mParams.inputTensorNames.size() == 2); | |
| if (!processInput(buffers)) | |
| { | |
| return false; | |
| } | |
| // Memcpy from host input buffers to device input buffers | |
| buffers.copyInputToDevice(); | |
| bool status = context->executeV2(buffers.getDeviceBindings().data()); | |
| if (!status) | |
| { | |
| return false; | |
| } | |
| // Memcpy from device output buffers to host output buffers | |
| buffers.copyOutputToHost(); | |
| // Verify results | |
| if (!verifyOutput(buffers)) | |
| { | |
| return false; | |
| } | |
| return true; | |
| } | |
| //! | |
| //! \brief Reads the input and stores the result in a managed buffer | |
| //! | |
| bool SampleNamedDimensions::processInput(samplesCommon::BufferManager const& buffers) | |
| { | |
| int32_t const input0H = mNamedDimension; | |
| int32_t const input0W = mInputDims[0].d[1]; | |
| int32_t const input1H = mNamedDimension; | |
| int32_t const input1W = mInputDims[1].d[1]; | |
| // Generate random input | |
| mInput0.resize(input0H * input0W); | |
| mInput1.resize(input1H * input1W); | |
| std::default_random_engine generator(static_cast<uint32_t>(time(nullptr))); | |
| std::uniform_real_distribution<float> unif_real_distr(-10., 10.); | |
| sample::gLogInfo << "Input0:\n"; | |
| for (int32_t i = 0; i < input0H * input0W; i++) | |
| { | |
| mInput0[i] = unif_real_distr(generator); | |
| sample::gLogInfo << mInput0[i] << (((i + 1) % input0W) ? " " : "\n"); | |
| } | |
| sample::gLogInfo << std::endl; | |
| sample::gLogInfo << "Input1:\n"; | |
| for (int32_t i = 0; i < input1H * input1W; i++) | |
| { | |
| mInput1[i] = unif_real_distr(generator); | |
| sample::gLogInfo << mInput1[i] << (((i + 1) % input1W) ? " " : "\n"); | |
| } | |
| sample::gLogInfo << std::endl; | |
| auto* hostInput0Buffer = static_cast<float*>(buffers.getHostBuffer(mParams.inputTensorNames[0])); | |
| std::copy(mInput0.begin(), mInput0.begin() + input0H * input0W, hostInput0Buffer); | |
| auto* hostInput1Buffer = static_cast<float*>(buffers.getHostBuffer(mParams.inputTensorNames[1])); | |
| std::copy(mInput1.begin(), mInput1.begin() + input1H * input1W, hostInput1Buffer); | |
| return true; | |
| } | |
| //! | |
| //! \brief Verify the result of concatenation | |
| //! | |
| //! \return whether the concatenated tesnor matches reference | |
| //! | |
| bool SampleNamedDimensions::verifyOutput(samplesCommon::BufferManager const& buffers) | |
| { | |
| int32_t const outputH = 2 * mNamedDimension; | |
| int32_t const outputW = mOutputDims[0].d[1]; | |
| int32_t const outputSize = outputH * outputW; | |
| auto* output = static_cast<float*>(buffers.getHostBuffer(mParams.outputTensorNames[0])); | |
| sample::gLogInfo << "Output:\n"; | |
| for (int32_t i = 0; i < outputSize; i++) | |
| { | |
| sample::gLogInfo << output[i] << (((i + 1) % outputW) ? " " : "\n"); | |
| } | |
| sample::gLogInfo << std::endl; | |
| mInput0.insert(mInput0.end(), mInput1.begin(), mInput1.end()); | |
| for (int32_t i = 0; i < outputH * outputW; i++) | |
| { | |
| auto const reference_value = i > outputSize / 2 ? mInput1[i - outputSize / 2] : mInput0[i]; | |
| if (fabs(output[i] - reference_value) > std::numeric_limits<float>::epsilon()) | |
| { | |
| return false; | |
| } | |
| } | |
| return true; | |
| } | |
| //! | |
| //! \brief Initializes members of the params struct using the command line args | |
| //! | |
| samplesCommon::OnnxSampleParams initializeSampleParams(samplesCommon::Args const& args) | |
| { | |
| samplesCommon::OnnxSampleParams params; | |
| if (args.dataDirs.empty()) // Use default directories if user hasn't provided directory paths | |
| { | |
| params.dataDirs.push_back("trt/samples/sampleNamedDimensions/"); | |
| } | |
| else // Use the data directory provided by the user | |
| { | |
| params.dataDirs = args.dataDirs; | |
| } | |
| params.onnxFileName = "concat_layer.onnx"; | |
| params.inputTensorNames.push_back("input0"); | |
| params.inputTensorNames.push_back("input1"); | |
| params.outputTensorNames.push_back("output"); | |
| params.timingCacheFile = params.timingCacheFile; | |
| return params; | |
| } | |
| //! | |
| //! \brief Prints the help information for running this sample | |
| //! | |
| void printHelpInfo() | |
| { | |
| std::cout << "Usage: ./sample_named_dimensions [-h or --help] [-d or --datadir=<path to data directory>] " | |
| << "[--timingCacheFile=<path to timing cache file>]" << std::endl; | |
| std::cout << "--help Display help information" << std::endl; | |
| std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used " | |
| "multiple times to add multiple directories. If no data directories are given, the default is to use " | |
| "(trt/samples/sampleNamedDimensions)" | |
| << std::endl; | |
| std::cout << "--timingCacheFile Specify path to a timing cache file. If it does not already exist, it will be " | |
| << "created." << std::endl; | |
| } | |
| int32_t main(int32_t argc, char** argv) | |
| { | |
| samplesCommon::Args args; | |
| bool argsOK = samplesCommon::parseArgs(args, argc, argv); | |
| if (!argsOK) | |
| { | |
| sample::gLogError << "Invalid arguments" << std::endl; | |
| printHelpInfo(); | |
| return EXIT_FAILURE; | |
| } | |
| if (args.help) | |
| { | |
| printHelpInfo(); | |
| return EXIT_SUCCESS; | |
| } | |
| auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv); | |
| sample::gLogger.reportTestStart(sampleTest); | |
| SampleNamedDimensions sample(initializeSampleParams(args)); | |
| sample::gLogInfo << "Building and running a GPU inference engine for synthetic ONNX model" << std::endl; | |
| sample.setNamedDimension(2); | |
| if (!sample.build()) | |
| { | |
| return sample::gLogger.reportFail(sampleTest); | |
| } | |
| if (!sample.infer()) | |
| { | |
| return sample::gLogger.reportFail(sampleTest); | |
| } | |
| return sample::gLogger.reportPass(sampleTest); | |
| } | |