whitbrunn's picture
1231: g0plus dockerfile
38fb1f6 verified
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//!
//! sampleCharRNN.cpp
//! This file contains the implementation of the char_rnn sample.
//! It uses weights from a trained TensorFlow model and creates the network
//! using the TensorRT network definition API
//! It can be run with the following command line:
//! Command: ./sample_char_rnn [-h or --help] [-d or --datadir=<path to data directory>]
//!
// Define TRT entrypoints used in common code
#define DEFINE_TRT_ENTRYPOINTS 1
#include <algorithm>
#include <array>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <ctime>
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <sys/stat.h>
#include <unordered_set>
#include <vector>
#include "NvInfer.h"
#include "argsParser.h"
#include "buffers.h"
#include "common.h"
#include "cuda_runtime_api.h"
#include "logger.h"
#include "sampleEngines.h"
using namespace nvinfer1;
using samplesCommon::SampleUniquePtr;
const std::string gSampleName = "TensorRT.sample_char_rnn";
static const std::array<int, 4> INDICES{0, 1, 2, 3};
// The model used by this sample was trained using github repository:
// https://github.com/crazydonkey200/tensorflow-char-rnn
//
// The data set used: tensorflow-char-rnn/data/tiny_shakespeare.txt
//
// The command used to train:
// python train.py --data_file=data/tiny_shakespeare.txt --num_epochs=100 --num_layer=2 --hidden_size=512
// --embedding_size=512 --dropout=.5
//
// Epochs trained: 100
// Test perplexity: 4.940
//
// Layer0 and Layer1 weights matrices are added as RNNW_L0_NAME and RNNW_L1_NAME, respectively.
// Layer0 and Layer1 bias are added as RNNB_L0_NAME and RNNB_L1_NAME, respectively.
// Embedded is added as EMBED_NAME.
// fc_w is added as FCW_NAME.
// fc_b is added as FCB_NAME.
struct SampleCharRNNWeightNames
{
const std::string RNNW_L0_NAME{"rnn_multi_rnn_cell_cell_0_basic_lstm_cell_kernel"};
const std::string RNNB_L0_NAME{"rnn_multi_rnn_cell_cell_0_basic_lstm_cell_bias"};
const std::string RNNW_L1_NAME{"rnn_multi_rnn_cell_cell_1_basic_lstm_cell_kernel"};
const std::string RNNB_L1_NAME{"rnn_multi_rnn_cell_cell_1_basic_lstm_cell_bias"};
const std::string FCW_NAME{"softmax_softmax_w"};
const std::string FCB_NAME{"softmax_softmax_b"};
const std::string EMBED_NAME{"embedding"};
std::unordered_set<std::string> names
= {{RNNW_L0_NAME, RNNB_L0_NAME, RNNW_L1_NAME, RNNB_L1_NAME, FCW_NAME, FCB_NAME, EMBED_NAME}};
};
struct SampleCharRNNBindingNames
{
const char* INPUT_BLOB_NAME{"data"};
const char* HIDDEN_IN_BLOB_NAME{"hiddenIn"};
const char* CELL_IN_BLOB_NAME{"cellIn"};
const char* HIDDEN_OUT_BLOB_NAME{"hiddenOut"};
const char* CELL_OUT_BLOB_NAME{"cellOut"};
const char* OUTPUT_BLOB_NAME{"pred"};
const char* SEQ_LEN_IN_BLOB_NAME{"seqLen"};
};
struct SampleCharRNNMaps
{
// A mapping from character to index used by the tensorflow model.
const std::map<char, int> charToID{{'\n', 0}, {'!', 1}, {' ', 2}, {'$', 3}, {'\'', 4}, {'&', 5}, {'-', 6}, {',', 7},
{'.', 8}, {'3', 9}, {';', 10}, {':', 11}, {'?', 12}, {'A', 13}, {'C', 14}, {'B', 15}, {'E', 16}, {'D', 17},
{'G', 18}, {'F', 19}, {'I', 20}, {'H', 21}, {'K', 22}, {'J', 23}, {'M', 24}, {'L', 25}, {'O', 26}, {'N', 27},
{'Q', 28}, {'P', 29}, {'S', 30}, {'R', 31}, {'U', 32}, {'T', 33}, {'W', 34}, {'V', 35}, {'Y', 36}, {'X', 37},
{'Z', 38}, {'a', 39}, {'c', 40}, {'b', 41}, {'e', 42}, {'d', 43}, {'g', 44}, {'f', 45}, {'i', 46}, {'h', 47},
{'k', 48}, {'j', 49}, {'m', 50}, {'l', 51}, {'o', 52}, {'n', 53}, {'q', 54}, {'p', 55}, {'s', 56}, {'r', 57},
{'u', 58}, {'t', 59}, {'w', 60}, {'v', 61}, {'y', 62}, {'x', 63}, {'z', 64}};
// A mapping from index to character used by the tensorflow model.
const std::vector<char> idToChar{{'\n', '!', ' ', '$', '\'', '&', '-', ',', '.', '3', ';', ':', '?', 'A', 'C', 'B',
'E', 'D', 'G', 'F', 'I', 'H', 'K', 'J', 'M', 'L', 'O', 'N', 'Q', 'P', 'S', 'R', 'U', 'T', 'W', 'V', 'Y', 'X',
'Z', 'a', 'c', 'b', 'e', 'd', 'g', 'f', 'i', 'h', 'k', 'j', 'm', 'l', 'o', 'n', 'q', 'p', 's', 'r', 'u', 't',
'w', 'v', 'y', 'x', 'z'}};
};
struct SampleCharRNNParams : samplesCommon::SampleParams
{
int layerCount;
int hiddenSize;
int seqSize;
int dataSize;
int vocabSize;
int outputSize;
std::string weightFileName;
std::string saveEngine;
std::string loadEngine;
SampleCharRNNMaps charMaps;
SampleCharRNNWeightNames weightNames;
SampleCharRNNBindingNames bindingNames;
std::vector<std::string> inputSentences;
std::vector<std::string> outputSentences;
};
//!
//! \brief The SampleCharRNNBase class implements the char_rnn sample
//!
//! \details It uses weights from a trained TensorFlow model and creates
//! the network using the TensorRT network definition API
//!
class SampleCharRNNBase
{
public:
SampleCharRNNBase(const SampleCharRNNParams& params)
: mParams(params)
{
}
virtual ~SampleCharRNNBase() = default;
//!
//! \brief Builds the network engine
//!
bool build();
//!
//! \brief Runs the TensorRT inference engine for this sample
//!
bool infer();
//!
//! \brief Used to clean up any state created in the sample class
//!
bool teardown();
protected:
//!
//! \brief Add inputs to the TensorRT network and configure LSTM layers using network definition API.
//!
virtual nvinfer1::ILayer* addLSTMLayers(SampleUniquePtr<nvinfer1::INetworkDefinition>& network) = 0;
//!
//! \brief Converts RNN weights from TensorFlow's format to TensorRT's format.
//!
nvinfer1::Weights convertRNNWeights(nvinfer1::Weights input, int dataSize);
//!
//! \brief Converts RNN Biases from TensorFlow's format to TensorRT's format.
//!
nvinfer1::Weights convertRNNBias(nvinfer1::Weights input);
std::map<std::string, nvinfer1::Weights> mWeightMap;
std::vector<std::unique_ptr<samplesCommon::HostMemory>> weightsMemory;
SampleCharRNNParams mParams;
nvinfer1::ITensor* addReshape(
SampleUniquePtr<nvinfer1::INetworkDefinition>& network, nvinfer1::ITensor& tensor, nvinfer1::Dims dims);
private:
//!
//! \brief Load requested weights from a formatted file into a map.
//!
std::map<std::string, nvinfer1::Weights> loadWeights(const std::string file);
//!
//! \brief Create full model using the TensorRT network definition API and build the engine.
//!
void constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config);
//!
//! \brief Looks up the embedding tensor for a given char and copies it to input buffer
//!
void copyEmbeddingToInput(samplesCommon::BufferManager& buffers, char const& c);
//!
//! \brief Perform one time step of inference with the TensorRT execution context
//!
bool stepOnce(samplesCommon::BufferManager& buffers, SampleUniquePtr<nvinfer1::IExecutionContext>& context,
cudaStream_t& stream);
//!
//! \brief Copies Ct/Ht output from the RNN to the Ct-1/Ht-1 input buffers for next time step
//!
void copyRNNOutputsToInputs(samplesCommon::BufferManager& buffers);
//!
//! \brief Transposes a sub-buffer of size height * width.
//!
bool transposeSubBuffers(void* data, int64_t height, int64_t width) noexcept;
std::shared_ptr<nvinfer1::IRuntime> mRuntime{nullptr}; //!< The TensorRT runtime used to run the network
std::shared_ptr<nvinfer1::ICudaEngine> mEngine{nullptr}; //!< The TensorRT engine used to run the network
};
class SampleCharRNNLoop : public SampleCharRNNBase
{
public:
struct LstmIO
{
nvinfer1::ITensor* data;
nvinfer1::ITensor* hidden;
nvinfer1::ITensor* cell;
};
struct LstmParams
{
nvinfer1::ITensor* inputWeights;
nvinfer1::ITensor* recurrentWeights;
nvinfer1::ITensor* inputBias;
nvinfer1::ITensor* recurrentBias;
nvinfer1::ITensor* maxSequenceSize;
};
SampleCharRNNLoop(SampleCharRNNParams params)
: SampleCharRNNBase(params)
{
}
protected:
//!
//! \brief Add inputs to the TensorRT network and configure LSTM layers using network definition API.
//!
nvinfer1::ILayer* addLSTMLayers(SampleUniquePtr<nvinfer1::INetworkDefinition>& network) final;
private:
nvinfer1::ILayer* addLSTMCell(SampleUniquePtr<nvinfer1::INetworkDefinition>& network, const LstmIO& inputTensors,
nvinfer1::ITensor* sequenceSize, const LstmParams& params, LstmIO& outputTensors);
};
//!
//! \brief Transpose a sub-buffer of size height * width.
//!
//! \param data The data to transpose. Serves as both input and output.
//! \param height The size of the height dimension to transpose.
//! \param width The size of the width dimension to transpose.
//!
//! \return True on success, false on failure.
//!
bool SampleCharRNNBase::transposeSubBuffers(void* data, int64_t height, int64_t width) noexcept
{
try
{
ASSERT(data != nullptr);
ASSERT(height > 0);
ASSERT(width > 0);
int64_t const tmpSize = height * width * sizeof(float);
samplesCommon::HostBuffer tmpbuf(tmpSize, DataType::kFLOAT);
ASSERT(tmpbuf.data() != nullptr);
auto in = static_cast<float*>(data);
auto out = static_cast<float*>(tmpbuf.data());
for (int64_t i{}; i < height; ++i)
{
for (int64_t j{}; j < width; ++j)
{
out[j * height + i] = in[i * width + j];
}
}
std::copy(static_cast<uint8_t*>(tmpbuf.data()), static_cast<uint8_t*>(tmpbuf.data()) + tmpSize,
static_cast<uint8_t*>(data));
}
catch (...)
{
return false;
}
return true;
}
//!
//! \brief Creates the network, configures the builder and creates
//! the network engine
//!
//! \details This function loads weights from a trained TensorFlow model,
//! creates the network using the TensorRT network definition API,
//! and builds a TensorRT engine.
//!
//! \return true if the engine was created successfully and false otherwise
//!
bool SampleCharRNNBase::build()
{
mWeightMap = SampleCharRNNBase::loadWeights(mParams.weightFileName);
if (mParams.loadEngine.empty())
{
auto builder
= SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
if (!builder)
{
return false;
}
auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0));
if (!network)
{
return false;
}
auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
if (!config)
{
return false;
}
config->setFlag(BuilderFlag::kGPU_FALLBACK);
// CUDA stream used for profiling by the builder.
auto profileStream = samplesCommon::makeCudaStream();
if (!profileStream)
{
return false;
}
config->setProfileStream(*profileStream);
constructNetwork(builder, network, config);
}
else
{
sample::gLogInfo << "Loading engine from: " << mParams.loadEngine << std::endl;
mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
sample::loadEngine(mParams.loadEngine, -1, std::cerr), samplesCommon::InferDeleter());
}
if (!mEngine)
{
return false;
}
if (!mParams.saveEngine.empty())
{
sample::gLogInfo << "Saving engine to: " << mParams.saveEngine << std::endl;
sample::saveEngine(*mEngine, mParams.saveEngine, std::cerr);
}
return true;
}
//!
//! \brief Load requested weights from a formatted file into a map.
//!
//! \param file Path to weights file. File has to be the formatted dump from
//! the dumpTFWts.py script. Otherwise, this function will not work as
//! intended.
//!
//! \return A map containing the extracted weights.
//!
//! \note Weight V2 files are in a very simple space delimited format.
//! <number of buffers>
//! for each buffer: [name] [type] [shape] <data as binary blob>\n
//! Note: type is the integer value of the DataType enum in NvInfer.h.
//!
std::map<std::string, nvinfer1::Weights> SampleCharRNNBase::loadWeights(const std::string file)
{
std::map<std::string, nvinfer1::Weights> weightMap;
std::ifstream input(file, std::ios_base::binary);
ASSERT(input.is_open() && "Unable to load weight file.");
int32_t count;
input >> count;
ASSERT(count > 0 && "Invalid weight map file.");
while (count--)
{
if (mParams.weightNames.names.empty())
{
break;
}
nvinfer1::Weights wt{nvinfer1::DataType::kFLOAT, nullptr, 0};
// parse name and DataType
std::string name;
uint32_t type;
input >> name >> std::dec >> type;
wt.type = static_cast<nvinfer1::DataType>(type);
// extract shape
std::string temp, shape;
std::getline(std::getline(input, temp, '('), shape, ')');
// calculate count based on shape
wt.count = 1;
std::istringstream shapeStream(shape);
while (std::getline(shapeStream, temp, ','))
wt.count *= std::stoul(temp);
size_t numOfBytes = samplesCommon::getNbBytes(wt.type, wt.count);
// skip reading of weights if name is not in the set of names requested for extraction
if (mParams.weightNames.names.find(name) == mParams.weightNames.names.end())
{
input.seekg(input.tellg() + static_cast<std::streamoff>(2 + numOfBytes));
continue;
}
else
{
mParams.weightNames.names.erase(name);
}
// Read weight values
input.seekg(input.tellg() + static_cast<std::streamoff>(1)); // skip space char
// We do not really care about the setup of DataType here. Use char here to avoid additional conversion
auto mem = new samplesCommon::TypedHostMemory<char, nvinfer1::DataType::kINT8>(numOfBytes);
weightsMemory.emplace_back(mem);
auto wtVals = mem->raw();
input.read(wtVals, numOfBytes);
input.seekg(input.tellg() + static_cast<std::streamoff>(1)); // skip new-line char
wt.values = wtVals;
weightMap[name] = wt;
}
input.close();
sample::gLogInfo << "Done reading weights from file..." << std::endl;
return weightMap;
}
//!
//! \brief Converts RNN weights from TensorFlow's format to TensorRT's format.
//!
//! \param input Weights that are stored in TensorFlow's format.
//!
//! \return Converted weights in TensorRT's format.
//!
//! \note TensorFlow weight parameters for BasicLSTMCell are formatted as:
//! Each [WR][icfo] is hiddenSize sequential elements.
//! CellN Row 0: WiT, WcT, WfT, WoT
//! CellN Row 1: WiT, WcT, WfT, WoT
//! ...
//! CellN RowM-1: WiT, WcT, WfT, WoT
//! CellN RowM+0: RiT, RcT, RfT, RoT
//! CellN RowM+1: RiT, RcT, RfT, RoT
//! ...
//! CellNRow2M-1: RiT, RcT, RfT, RoT
//!
//! TensorRT expects the format to laid out in memory:
//! CellN: Wi, Wc, Wf, Wo, Ri, Rc, Rf, Ro
//!
nvinfer1::Weights SampleCharRNNBase::convertRNNWeights(nvinfer1::Weights orig, int dataSize)
{
nvinfer1::Weights input{orig.type, orig.values, (dataSize + mParams.hiddenSize) * 4 * mParams.hiddenSize};
auto mem = new samplesCommon::FloatMemory(input.count);
weightsMemory.emplace_back(mem);
auto ptr = mem->raw();
float const* data = static_cast<float const*>(input.values);
int64_t dimsW[2]{dataSize, 4 * mParams.hiddenSize};
int64_t dimsR[2]{mParams.hiddenSize, 4 * mParams.hiddenSize};
std::copy(data, data + input.count, ptr);
ASSERT(transposeSubBuffers(ptr, dimsW[0], dimsW[1]));
ASSERT(transposeSubBuffers(&ptr[dimsW[0] * dimsW[1]], dimsR[0], dimsR[1]));
return nvinfer1::Weights{input.type, ptr, input.count};
}
//!
//! \brief Converts RNN Biases from TensorFlow's format to TensorRT's format.
//!
//! \param input Biases that are stored in TensorFlow's format.
//!
//! \return Converted bias in TensorRT's format.
//!
//! \note TensorFlow bias parameters for BasicLSTMCell are formatted as:
//! CellN: Bi, Bc, Bf, Bo
//!
//! TensorRT expects the format to be:
//! CellN: Wi, Wc, Wf, Wo, Ri, Rc, Rf, Ro
//!
//! Since tensorflow already combines U and W,
//! we double the size and set all of U to zero.
nvinfer1::Weights SampleCharRNNBase::convertRNNBias(nvinfer1::Weights input)
{
auto mem = new samplesCommon::FloatMemory(input.count * 2);
weightsMemory.emplace_back(mem);
auto ptr = mem->raw();
const float* iptr = static_cast<const float*>(input.values);
int64_t count = 4 * mParams.hiddenSize;
ASSERT(input.count == count);
std::copy(iptr, iptr + count, ptr);
float* shiftedPtr = ptr + count;
std::fill(shiftedPtr, shiftedPtr + count, 0.0);
return nvinfer1::Weights{input.type, ptr, input.count * 2};
}
nvinfer1::ILayer* SampleCharRNNLoop::addLSTMCell(SampleUniquePtr<nvinfer1::INetworkDefinition>& network,
const LstmIO& inputTensors, nvinfer1::ITensor* sequenceSize, const LstmParams& params, LstmIO& outputTensors)
{
nvinfer1::ILoop* sequenceLoop = network->addLoop();
sequenceLoop->addTripLimit(*sequenceSize, nvinfer1::TripLimit::kCOUNT);
nvinfer1::ITensor* input = sequenceLoop->addIterator(*inputTensors.data)->getOutput(0);
nvinfer1::IRecurrenceLayer* hidden = sequenceLoop->addRecurrence(*inputTensors.hidden);
nvinfer1::IRecurrenceLayer* cell = sequenceLoop->addRecurrence(*inputTensors.cell);
nvinfer1::ITensor* mmInput = network
->addMatrixMultiply(*input, nvinfer1::MatrixOperation::kVECTOR,
*params.inputWeights, nvinfer1::MatrixOperation::kTRANSPOSE)
->getOutput(0);
nvinfer1::ITensor* mmHidden = network
->addMatrixMultiply(*hidden->getOutput(0), nvinfer1::MatrixOperation::kVECTOR,
*params.recurrentWeights, nvinfer1::MatrixOperation::kTRANSPOSE)
->getOutput(0);
nvinfer1::ITensor* mm
= network->addElementWise(*mmInput, *mmHidden, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
nvinfer1::ITensor* bias
= network->addElementWise(*params.inputBias, *params.recurrentBias, nvinfer1::ElementWiseOperation::kSUM)
->getOutput(0);
nvinfer1::ITensor* gatesICFO
= network->addElementWise(*mm, *bias, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
const auto isolateGate = [&](nvinfer1::ITensor& gates, int gateIndex) -> nvinfer1::ITensor* {
nvinfer1::ISliceLayer* slice = network->addSlice(gates, nvinfer1::Dims{1, {gateIndex * mParams.hiddenSize}},
nvinfer1::Dims{1, {mParams.hiddenSize}}, nvinfer1::Dims{1, {1}});
return addReshape(network, *slice->getOutput(0), nvinfer1::Dims{1, {mParams.hiddenSize}});
};
nvinfer1::ITensor* i
= network->addActivation(*isolateGate(*gatesICFO, 0), nvinfer1::ActivationType::kSIGMOID)->getOutput(0);
nvinfer1::ITensor* c
= network->addActivation(*isolateGate(*gatesICFO, 1), nvinfer1::ActivationType::kTANH)->getOutput(0);
nvinfer1::ITensor* f
= network->addActivation(*isolateGate(*gatesICFO, 2), nvinfer1::ActivationType::kSIGMOID)->getOutput(0);
nvinfer1::ITensor* o
= network->addActivation(*isolateGate(*gatesICFO, 3), nvinfer1::ActivationType::kSIGMOID)->getOutput(0);
nvinfer1::ITensor* C
= network
->addElementWise(*network->addElementWise(*f, *cell->getOutput(0), nvinfer1::ElementWiseOperation::kPROD)
->getOutput(0),
*network->addElementWise(*i, *c, nvinfer1::ElementWiseOperation::kPROD)->getOutput(0),
nvinfer1::ElementWiseOperation::kSUM)
->getOutput(0);
nvinfer1::ITensor* H
= network
->addElementWise(*o, *network->addActivation(*C, nvinfer1::ActivationType::kTANH)->getOutput(0),
nvinfer1::ElementWiseOperation::kPROD)
->getOutput(0);
// Recurrent backedge input for hidden and cell.
cell->setInput(1, *C);
hidden->setInput(1, *H);
nvinfer1::ILoopOutputLayer* outputLayer = sequenceLoop->addLoopOutput(*H, nvinfer1::LoopOutput::kCONCATENATE);
outputLayer->setInput(1, *params.maxSequenceSize);
nvinfer1::ITensor* hiddenOut
= sequenceLoop->addLoopOutput(*hidden->getOutput(0), nvinfer1::LoopOutput::kLAST_VALUE)->getOutput(0);
nvinfer1::ITensor* cellOut
= sequenceLoop->addLoopOutput(*cell->getOutput(0), nvinfer1::LoopOutput::kLAST_VALUE)->getOutput(0);
outputTensors = LstmIO{outputLayer->getOutput(0), hiddenOut, cellOut};
return outputLayer;
}
nvinfer1::ITensor* SampleCharRNNBase::addReshape(
SampleUniquePtr<nvinfer1::INetworkDefinition>& network, nvinfer1::ITensor& tensor, nvinfer1::Dims dims)
{
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(tensor);
shuffle->setReshapeDimensions(dims);
return shuffle->getOutput(0);
}
nvinfer1::ILayer* SampleCharRNNLoop::addLSTMLayers(SampleUniquePtr<nvinfer1::INetworkDefinition>& network)
{
nvinfer1::ILayer* dataOut{nullptr};
nvinfer1::ITensor* data = network->addInput(mParams.bindingNames.INPUT_BLOB_NAME, nvinfer1::DataType::kFLOAT,
nvinfer1::Dims2(mParams.seqSize, mParams.dataSize));
ASSERT(data != nullptr);
nvinfer1::ITensor* hiddenLayers = network->addInput(mParams.bindingNames.HIDDEN_IN_BLOB_NAME,
nvinfer1::DataType::kFLOAT, nvinfer1::Dims2(mParams.layerCount, mParams.hiddenSize));
ASSERT(hiddenLayers != nullptr);
nvinfer1::ITensor* cellLayers = network->addInput(mParams.bindingNames.CELL_IN_BLOB_NAME,
nvinfer1::DataType::kFLOAT, nvinfer1::Dims2(mParams.layerCount, mParams.hiddenSize));
ASSERT(cellLayers != nullptr);
nvinfer1::ITensor* sequenceSize
= network->addInput(mParams.bindingNames.SEQ_LEN_IN_BLOB_NAME, nvinfer1::DataType::kINT32, nvinfer1::Dims{});
ASSERT(sequenceSize != nullptr);
// convert tensorflow weight format to trt weight format
std::array<nvinfer1::Weights, 2> rnnw{
SampleCharRNNBase::convertRNNWeights(mWeightMap[mParams.weightNames.RNNW_L0_NAME], mParams.dataSize),
SampleCharRNNBase::convertRNNWeights(mWeightMap[mParams.weightNames.RNNW_L1_NAME], mParams.hiddenSize)};
std::array<nvinfer1::Weights, 2> rnnb{
SampleCharRNNBase::convertRNNBias(mWeightMap[mParams.weightNames.RNNB_L0_NAME]),
SampleCharRNNBase::convertRNNBias(mWeightMap[mParams.weightNames.RNNB_L1_NAME])};
// Store the transformed weights in the weight map so the memory can be properly released later.
mWeightMap["rnnwL0"] = rnnw[0];
mWeightMap["rnnwL1"] = rnnw[1];
mWeightMap["rnnbL0"] = rnnb[0];
mWeightMap["rnnbL1"] = rnnb[1];
nvinfer1::ITensor* maxSequenceSize
= network->addConstant(nvinfer1::Dims{}, Weights{DataType::kINT32, &mParams.seqSize, 1})->getOutput(0);
ASSERT(static_cast<size_t>(mParams.layerCount) <= INDICES.size());
LstmIO lstmNext{data, nullptr, nullptr};
std::vector<nvinfer1::ITensor*> hiddenOutputs;
std::vector<nvinfer1::ITensor*> cellOutputs;
nvinfer1::Dims2 dimWL0(4 * mParams.hiddenSize, mParams.dataSize);
nvinfer1::Dims2 dimR(4 * mParams.hiddenSize, mParams.hiddenSize);
nvinfer1::Dims dimB{1, {4 * mParams.hiddenSize}};
nvinfer1::Dims dim0{1, {0}};
auto extractWeights = [](nvinfer1::Weights weights, Dims start, Dims size) -> nvinfer1::Weights {
const char* data = static_cast<const char*>(weights.values);
int64_t shift = samplesCommon::volume(start);
const int bufferSize = samplesCommon::getNbBytes(weights.type, shift);
int64_t count = samplesCommon::volume(size);
ASSERT(shift + count <= weights.count);
return nvinfer1::Weights{weights.type, data + bufferSize, count};
};
for (int i = 0; i < mParams.layerCount; ++i)
{
nvinfer1::Dims dimW = i == 0 ? dimWL0 : dimR;
nvinfer1::ITensor* index
= network->addConstant(nvinfer1::Dims{}, Weights{DataType::kINT32, &INDICES[i], 1})->getOutput(0);
nvinfer1::ITensor* hidden = network->addGather(*hiddenLayers, *index, 0)->getOutput(0);
nvinfer1::ITensor* cell = network->addGather(*cellLayers, *index, 0)->getOutput(0);
nvinfer1::ITensor* weightIn = network->addConstant(dimW, extractWeights(rnnw[i], dim0, dimW))->getOutput(0);
nvinfer1::ITensor* weightRec = network->addConstant(dimR, extractWeights(rnnw[i], dimW, dimR))->getOutput(0);
nvinfer1::ITensor* biasIn = network->addConstant(dimB, extractWeights(rnnb[i], dim0, dimB))->getOutput(0);
nvinfer1::ITensor* biasRec = network->addConstant(dimB, extractWeights(rnnb[i], dimB, dimB))->getOutput(0);
LstmIO lstmInput{lstmNext.data, hidden, cell};
LstmParams params{weightIn, weightRec, biasIn, biasRec, maxSequenceSize};
Dims2 dims{1, mParams.hiddenSize};
dataOut = addLSTMCell(network, lstmInput, sequenceSize, params, lstmNext);
hiddenOutputs.push_back(addReshape(network, *lstmNext.hidden, dims));
cellOutputs.push_back(addReshape(network, *lstmNext.cell, dims));
}
auto addConcatenation = [&network](std::vector<nvinfer1::ITensor*> tensors) -> nvinfer1::ITensor* {
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(tensors.data(), tensors.size());
concat->setAxis(0);
return concat->getOutput(0);
};
nvinfer1::ITensor* hiddenNext = addConcatenation(hiddenOutputs);
hiddenNext->setName(mParams.bindingNames.HIDDEN_OUT_BLOB_NAME);
network->markOutput(*hiddenNext);
nvinfer1::ITensor* cellNext = addConcatenation(cellOutputs);
cellNext->setName(mParams.bindingNames.CELL_OUT_BLOB_NAME);
network->markOutput(*cellNext);
return dataOut;
}
//!
//! \brief Create full model using the TensorRT network definition API and build the engine.
//!
//! \param weightMap Map that contains all the weights required by the model.
//! \param modelStream The stream within which the engine is serialized once built.
//!
void SampleCharRNNBase::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config)
{
// add RNNv2 layer and set its parameters
auto rnn = addLSTMLayers(network);
// Transpose FC weights since TensorFlow's weights are transposed when compared to TensorRT
ASSERT(transposeSubBuffers(
(void*) mWeightMap[mParams.weightNames.FCW_NAME].values, mParams.hiddenSize, mParams.vocabSize));
// add Constant layers for fully connected weights
auto fcwts = network->addConstant(
nvinfer1::Dims2(mParams.vocabSize, mParams.hiddenSize), mWeightMap[mParams.weightNames.FCW_NAME]);
// Add matrix multiplication layer for multiplying rnn output with FC weights
auto matrixMultLayer = network->addMatrixMultiply(
*fcwts->getOutput(0), MatrixOperation::kNONE, *rnn->getOutput(0), MatrixOperation::kTRANSPOSE);
ASSERT(matrixMultLayer != nullptr);
matrixMultLayer->getOutput(0)->setName("Matrix Multiplicaton output");
// Add elementwise layer for adding bias
auto fcbias = network->addConstant(nvinfer1::Dims2(mParams.vocabSize, 1), mWeightMap[mParams.weightNames.FCB_NAME]);
auto addBiasLayer = network->addElementWise(
*matrixMultLayer->getOutput(0), *fcbias->getOutput(0), nvinfer1::ElementWiseOperation::kSUM);
ASSERT(addBiasLayer != nullptr);
addBiasLayer->getOutput(0)->setName("Add Bias output");
// Add TopK layer to determine which character has highest probability.
int reduceAxis = 0x1; // reduce across vocab axis
auto pred = network->addTopK(*addBiasLayer->getOutput(0), nvinfer1::TopKOperation::kMAX, 1, reduceAxis);
ASSERT(pred != nullptr);
pred->getOutput(1)->setName(mParams.bindingNames.OUTPUT_BLOB_NAME);
// Mark the outputs for the network
network->markOutput(*pred->getOutput(1));
pred->getOutput(1)->setType(nvinfer1::DataType::kINT32);
SampleUniquePtr<nvinfer1::ITimingCache> timingCache{};
if (!mParams.timingCacheFile.empty())
{
timingCache
= samplesCommon::buildTimingCacheFromFile(sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile);
}
sample::gLogInfo << "Done constructing network..." << std::endl;
SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)};
if (!plan)
{
return;
}
if (timingCache != nullptr && !mParams.timingCacheFile.empty())
{
samplesCommon::updateTimingCacheFile(
sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
}
mRuntime = std::shared_ptr<nvinfer1::IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
if (!mRuntime)
{
return;
}
mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter());
}
//!
//! \brief Runs the TensorRT inference engine for this sample
//!
//! \details This function is the main execution function of the sample. It
//! allocates the buffer, sets inputs, executes the engine, and verifies the output.
//!
bool SampleCharRNNBase::infer()
{
// Create RAII buffer manager object
samplesCommon::BufferManager buffers(mEngine, 0);
auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
if (!context)
{
return false;
}
// Select a random seed string.
srand(unsigned(time(nullptr)));
int sentenceIndex = rand() % mParams.inputSentences.size();
std::string inputSentence = mParams.inputSentences[sentenceIndex];
std::string expected = mParams.outputSentences[sentenceIndex];
std::string genstr;
sample::gLogInfo << "RNN warmup sentence: " << inputSentence << std::endl;
sample::gLogInfo << "Expected output: " << expected << std::endl;
// create stream for trt execution
cudaStream_t stream;
CHECK(cudaStreamCreate(&stream));
// Set sequence lengths to maximum
int* sequenceLengthIn
= reinterpret_cast<int32_t*>(buffers.getHostBuffer(mParams.bindingNames.SEQ_LEN_IN_BLOB_NAME));
auto sequenceLengthTensorSize = buffers.size(mParams.bindingNames.SEQ_LEN_IN_BLOB_NAME);
std::fill_n(sequenceLengthIn, sequenceLengthTensorSize / sizeof(mParams.seqSize), mParams.seqSize);
// Initialize hiddenIn and cellIn tensors to zero before seeding
void* hiddenIn = buffers.getHostBuffer(mParams.bindingNames.HIDDEN_IN_BLOB_NAME);
auto hiddenTensorSize = buffers.size(mParams.bindingNames.HIDDEN_IN_BLOB_NAME);
void* cellIn = buffers.getHostBuffer(mParams.bindingNames.CELL_IN_BLOB_NAME);
auto cellTensorSize = buffers.size(mParams.bindingNames.CELL_IN_BLOB_NAME);
std::memset(hiddenIn, 0, hiddenTensorSize);
std::memset(cellIn, 0, cellTensorSize);
// Seed the RNN with the input sentence.
for (auto& a : inputSentence)
{
SampleCharRNNBase::copyEmbeddingToInput(buffers, a);
if (!SampleCharRNNBase::stepOnce(buffers, context, stream))
{
return false;
}
SampleCharRNNBase::copyRNNOutputsToInputs(buffers);
genstr.push_back(a);
}
// Extract first predicted character
uint32_t predIdx = *reinterpret_cast<uint32_t*>(buffers.getHostBuffer(mParams.bindingNames.OUTPUT_BLOB_NAME));
genstr.push_back(mParams.charMaps.idToChar.at(predIdx));
// Generate predicted sequence of characters
for (size_t x = 0, y = expected.size() - 1; x < y; x++)
{
SampleCharRNNBase::copyEmbeddingToInput(buffers, *genstr.rbegin());
if (!SampleCharRNNBase::stepOnce(buffers, context, stream))
{
return false;
}
SampleCharRNNBase::copyRNNOutputsToInputs(buffers);
predIdx = *reinterpret_cast<uint32_t*>(buffers.getHostBuffer(mParams.bindingNames.OUTPUT_BLOB_NAME));
genstr.push_back(mParams.charMaps.idToChar.at(predIdx));
}
sample::gLogInfo << "Received: " << genstr.substr(inputSentence.size()) << std::endl;
// release the stream
CHECK(cudaStreamDestroy(stream));
return genstr == (inputSentence + expected);
}
//!
//! \brief Looks up the embedding tensor for a given char and copies it to input buffer
//!
void SampleCharRNNBase::copyEmbeddingToInput(samplesCommon::BufferManager& buffers, char const& c)
{
auto embed = mWeightMap[mParams.weightNames.EMBED_NAME];
float* inputBuffer = static_cast<float*>(buffers.getHostBuffer(mParams.bindingNames.INPUT_BLOB_NAME));
auto index = mParams.charMaps.charToID.at(c);
auto bufSize = buffers.size(mParams.bindingNames.INPUT_BLOB_NAME);
std::memcpy(inputBuffer, static_cast<const float*>(embed.values) + index * mParams.dataSize, bufSize);
}
//!
//! \brief Perform one time step of inference with the TensorRT execution context
//!
bool SampleCharRNNBase::stepOnce(
samplesCommon::BufferManager& buffers, SampleUniquePtr<nvinfer1::IExecutionContext>& context, cudaStream_t& stream)
{
// Asynchronously copy data from host input buffers to device input buffers
buffers.copyInputToDeviceAsync(stream);
for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
{
auto const name = mEngine->getIOTensorName(i);
context->setTensorAddress(name, buffers.getDeviceBuffer(name));
}
// Asynchronously enqueue the inference work
ASSERT(context->enqueueV3(stream));
// Asynchronously copy data from device output buffers to host output buffers
buffers.copyOutputToHostAsync(stream);
CHECK(cudaStreamSynchronize(stream));
return true;
}
//!
//! \brief Copies Ct/Ht output from the RNN to the Ct-1/Ht-1 input buffers for next time step
//!
void SampleCharRNNBase::copyRNNOutputsToInputs(samplesCommon::BufferManager& buffers)
{
// Copy Ct/Ht to the Ct-1/Ht-1 slots.
void* hiddenIn = buffers.getHostBuffer(mParams.bindingNames.HIDDEN_IN_BLOB_NAME);
void* hiddenOut = buffers.getHostBuffer(mParams.bindingNames.HIDDEN_OUT_BLOB_NAME);
auto hiddenTensorSize = buffers.size(mParams.bindingNames.HIDDEN_IN_BLOB_NAME);
void* cellIn = buffers.getHostBuffer(mParams.bindingNames.CELL_IN_BLOB_NAME);
void* cellOut = buffers.getHostBuffer(mParams.bindingNames.CELL_OUT_BLOB_NAME);
auto cellTensorSize = buffers.size(mParams.bindingNames.CELL_IN_BLOB_NAME);
std::memcpy(hiddenIn, hiddenOut, hiddenTensorSize);
std::memcpy(cellIn, cellOut, cellTensorSize);
}
//!
//! \brief Used to clean up any state created in the sample class
//!
bool SampleCharRNNBase::teardown()
{
return true;
}
//!
//! \brief Initializes members of the params struct using the
//! command line args
//!
SampleCharRNNParams initializeSampleParams(const samplesCommon::Args& args)
{
SampleCharRNNParams params;
if (args.dataDirs.empty())
{
params.dataDirs.push_back("data/char-rnn/");
params.dataDirs.push_back("data/samples/char-rnn/");
}
else
{
params.dataDirs = args.dataDirs;
}
params.batchSize = 1;
params.layerCount = 2;
params.hiddenSize = 512;
params.seqSize = 1;
params.dataSize = params.hiddenSize;
params.vocabSize = 65;
params.outputSize = 1;
params.weightFileName = samplesCommon::locateFile("char-rnn.wts", params.dataDirs);
params.saveEngine = args.saveEngine;
params.loadEngine = args.loadEngine;
params.timingCacheFile = args.timingCacheFile;
// Input strings and their respective expected output strings
const std::vector<std::string> inS{
"ROMEO",
"JUL",
"The K",
"That tho",
"KING",
"beauty of",
"birth of K",
"Hi",
"JACK",
"interestingly, it was J",
};
const std::vector<std::string> outS{
":\nThe sense to",
"IET:\nWhat shall I shall be",
"ing Richard shall be the strange",
"u shalt be the",
" HENRY VI:\nWhat",
" the son,",
"ing Richard's son",
"ng of York,\nThat thou hast so the",
"INGHAM:\nWhat shall I",
"uliet",
};
params.inputSentences = inS;
params.outputSentences = outS;
return params;
}
//!
//! \brief Prints the help information for running this sample
//!
void printHelpInfo()
{
std::cout << "Usage: ./sample_char_rnn [-h or --help] [-d or --datadir=<path to data directory>]\n";
std::cout << "--help Display help information\n";
std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used "
"multiple times to add multiple directories. If no data directories are given, the default is to use "
"data/samples/char-rnn/ and data/char-rnn/"
<< std::endl;
std::cout << "--loadEngine Specify path from which to load the engine. When this option is provided, engine "
<< std::endl;
std::cout << "--saveEngine Specify path at which to save the engine." << std::endl;
std::cout << "--timingCacheFile Specify path to a timing cache file. If it does not already exist, it will be "
<< "created." << std::endl;
}
//!
//! \brief Runs the char-rnn model in TensorRT with a set of expected input and output strings.
//!
int main(int argc, char** argv)
{
sample::setReportableSeverity(sample::Logger::Severity::kVERBOSE);
samplesCommon::Args args;
bool argsOK = samplesCommon::parseArgs(args, argc, argv);
if (!argsOK)
{
sample::gLogError << "Invalid arguments" << std::endl;
printHelpInfo();
return EXIT_FAILURE;
}
if (args.help)
{
printHelpInfo();
return EXIT_SUCCESS;
}
auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);
sample::gLogger.reportTestStart(sampleTest);
SampleCharRNNParams params = initializeSampleParams(args);
std::unique_ptr<SampleCharRNNBase> sample;
sample.reset(new SampleCharRNNLoop(params));
sample::gLogInfo << "Building and running a GPU inference engine for Char RNN model..." << std::endl;
if (!sample->build())
{
return sample::gLogger.reportFail(sampleTest);
}
if (!sample->infer())
{
return sample::gLogger.reportFail(sampleTest);
}
if (!sample->teardown())
{
return sample::gLogger.reportFail(sampleTest);
}
return sample::gLogger.reportPass(sampleTest);
}