G0-VLA / g0plus_dockerfile /docker-assets /data /TensorRT-10.13.0.35 /samples /common /sampleOptions.cpp
| /* | |
| * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| * SPDX-License-Identifier: Apache-2.0 | |
| * | |
| * Licensed under the Apache License, Version 2.0 (the "License"); | |
| * you may not use this file except in compliance with the License. | |
| * You may obtain a copy of the License at | |
| * | |
| * http://www.apache.org/licenses/LICENSE-2.0 | |
| * | |
| * Unless required by applicable law or agreed to in writing, software | |
| * distributed under the License is distributed on an "AS IS" BASIS, | |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| * See the License for the specific language governing permissions and | |
| * limitations under the License. | |
| */ | |
| using namespace nvinfer1; | |
| namespace sample | |
| { | |
| namespace | |
| { | |
| static const std::map<char, std::pair<int64_t, std::string>> kUNIT_MULTIPLIERS{ | |
| {'B', {1, "Bytes"}}, | |
| {'K', {1 << 10, "Kibibytes"}}, | |
| {'M', {1 << 20, "Mebibytes"}}, | |
| {'G', {1 << 30, "Gibibytes"}}, | |
| }; | |
| std::string addDefaultUnitSuffixIfNotSpecified(std::string const& option, char defaultUnit) | |
| { | |
| char lastChar = option.at(option.size() - 1); | |
| return std::isdigit(lastChar) ? option + defaultUnit : option; | |
| } | |
| // Returns "B (Bytes), K (Kilobytes), ..." | |
| std::string getAvailableUnitSuffixes() | |
| { | |
| std::ostringstream ss; | |
| for (auto it = kUNIT_MULTIPLIERS.begin(); it != kUNIT_MULTIPLIERS.end(); ++it) | |
| { | |
| if (it != kUNIT_MULTIPLIERS.begin()) | |
| { | |
| ss << ", "; | |
| } | |
| ss << it->first << " (" << it->second.second << ")"; | |
| } | |
| return ss.str(); | |
| } | |
| // Numeric trtexec arguments can have unit specifiers in similar to polygraphy. | |
| // E.g. --weightStreamingBudget=20M would be 20 Mebibytes (base 2). | |
| int64_t getUnitMultiplier(std::string const& option) | |
| { | |
| char lastChar = option.at(option.size() - 1); | |
| if (!std::isdigit(lastChar)) | |
| { | |
| char unit = std::toupper(lastChar); | |
| auto found = kUNIT_MULTIPLIERS.find(unit); | |
| if (found == kUNIT_MULTIPLIERS.end()) | |
| { | |
| std::ostringstream ss; | |
| ss << "Error parsing \"" << option << "\": invalid unit specifier '" << unit | |
| << "'. Valid base-2 unit suffixes include: "; | |
| ss << getAvailableUnitSuffixes() << "."; | |
| throw std::invalid_argument(ss.str()); | |
| } | |
| return found->second.first; | |
| } | |
| // Return bytes by default | |
| return kUNIT_MULTIPLIERS.at('B').first; | |
| } | |
| template <typename T> | |
| T stringToValue(const std::string& option) | |
| { | |
| return T{option}; | |
| } | |
| template <> | |
| int32_t stringToValue<int32_t>(const std::string& option) | |
| { | |
| return std::stoi(option); | |
| } | |
| template <> | |
| int64_t stringToValue<int64_t>(const std::string& option) | |
| { | |
| return std::stoi(option); | |
| } | |
| template <> | |
| size_t stringToValue<size_t>(const std::string& option) | |
| { | |
| return std::stoi(option) * getUnitMultiplier(option); | |
| } | |
| template <> | |
| float stringToValue<float>(const std::string& option) | |
| { | |
| return std::stof(option); | |
| } | |
| template <> | |
| double stringToValue<double>(const std::string& option) | |
| { | |
| return std::stod(option) * getUnitMultiplier(option); | |
| } | |
| template <> | |
| bool stringToValue<bool>(const std::string& option) | |
| { | |
| return true; | |
| } | |
| template <> | |
| std::vector<int64_t> stringToValue<std::vector<int64_t>>(const std::string& option) | |
| { | |
| std::vector<int64_t> shape; | |
| if (option == "scalar") | |
| { | |
| return shape; | |
| } | |
| std::vector<std::string> dimsStrings = splitToStringVec(option, 'x'); | |
| for (const auto& d : dimsStrings) | |
| { | |
| shape.push_back(stringToValue<int64_t>(d)); | |
| } | |
| return shape; | |
| } | |
| template <> | |
| nvinfer1::DataType stringToValue<nvinfer1::DataType>(const std::string& option) | |
| { | |
| const std::unordered_map<std::string, nvinfer1::DataType> strToDT{{"fp32", nvinfer1::DataType::kFLOAT}, | |
| {"fp16", nvinfer1::DataType::kHALF}, {"bf16", nvinfer1::DataType::kBF16}, {"int8", nvinfer1::DataType::kINT8}, | |
| {"fp8", nvinfer1::DataType::kFP8}, {"int32", nvinfer1::DataType::kINT32}, {"int64", nvinfer1::DataType::kINT64}, | |
| {"bool", nvinfer1::DataType::kBOOL}, {"uint8", nvinfer1::DataType::kUINT8}, | |
| {"int4", nvinfer1::DataType::kINT4}}; | |
| const auto& dt = strToDT.find(option); | |
| if (dt == strToDT.end()) | |
| { | |
| throw std::invalid_argument("Invalid DataType " + option); | |
| } | |
| return dt->second; | |
| } | |
| template <> | |
| nvinfer1::DeviceType stringToValue<nvinfer1::DeviceType>(std::string const& option) | |
| { | |
| std::unordered_map<std::string, nvinfer1::DeviceType> const strToDevice = { | |
| {"GPU", nvinfer1::DeviceType::kGPU}, | |
| {"DLA", nvinfer1::DeviceType::kDLA}, | |
| }; | |
| auto const& device = strToDevice.find(option); | |
| if (device == strToDevice.end()) | |
| { | |
| throw std::invalid_argument("Invalid Device Type " + option); | |
| } | |
| return device->second; | |
| } | |
| template <> | |
| nvinfer1::TensorFormats stringToValue<nvinfer1::TensorFormats>(const std::string& option) | |
| { | |
| std::vector<std::string> optionStrings = splitToStringVec(option, '+'); | |
| const std::unordered_map<std::string, nvinfer1::TensorFormat> strToFmt{{"chw", nvinfer1::TensorFormat::kLINEAR}, | |
| {"chw2", nvinfer1::TensorFormat::kCHW2}, {"chw4", nvinfer1::TensorFormat::kCHW4}, | |
| {"hwc8", nvinfer1::TensorFormat::kHWC8}, {"chw16", nvinfer1::TensorFormat::kCHW16}, | |
| {"chw32", nvinfer1::TensorFormat::kCHW32}, {"dhwc8", nvinfer1::TensorFormat::kDHWC8}, | |
| {"cdhw32", nvinfer1::TensorFormat::kCDHW32}, {"hwc", nvinfer1::TensorFormat::kHWC}, | |
| {"dhwc", nvinfer1::TensorFormat::kDHWC}, {"dla_linear", nvinfer1::TensorFormat::kDLA_LINEAR}, | |
| {"hwc16", nvinfer1::TensorFormat::kHWC16}, {"dla_hwc4", nvinfer1::TensorFormat::kDLA_HWC4}}; | |
| nvinfer1::TensorFormats formats{}; | |
| for (auto f : optionStrings) | |
| { | |
| const auto& tf = strToFmt.find(f); | |
| if (tf == strToFmt.end()) | |
| { | |
| throw std::invalid_argument(std::string("Invalid TensorFormat ") + f); | |
| } | |
| formats |= 1U << static_cast<int32_t>(tf->second); | |
| } | |
| return formats; | |
| } | |
| template <> | |
| IOFormat stringToValue<IOFormat>(const std::string& option) | |
| { | |
| IOFormat ioFormat{}; | |
| const size_t colon = option.find(':'); | |
| if (colon == std::string::npos) | |
| { | |
| throw std::invalid_argument(std::string("Invalid IOFormat ") + option); | |
| } | |
| ioFormat.first = stringToValue<nvinfer1::DataType>(option.substr(0, colon)); | |
| ioFormat.second = stringToValue<nvinfer1::TensorFormats>(option.substr(colon + 1)); | |
| return ioFormat; | |
| } | |
| template <> | |
| SparsityFlag stringToValue<SparsityFlag>(std::string const& option) | |
| { | |
| std::unordered_map<std::string, SparsityFlag> const table{ | |
| {"disable", SparsityFlag::kDISABLE}, {"enable", SparsityFlag::kENABLE}, | |
| { | |
| "force", SparsityFlag::kFORCE | |
| } | |
| }; | |
| auto search = table.find(option); | |
| if (search == table.end()) | |
| { | |
| throw std::invalid_argument(std::string("Unknown sparsity mode: ") + option); | |
| } | |
| if (search->second == SparsityFlag::kFORCE) | |
| { | |
| sample::gLogWarning << "--sparsity=force has been deprecated. " | |
| << "Please use <polygraphy surgeon prune> to rewrite the weights to a sparsity pattern " | |
| << "and then run with --sparsity=enable" << std::endl; | |
| } | |
| return search->second; | |
| } | |
| template <> | |
| WeightStreamingBudget stringToValue<WeightStreamingBudget>(std::string const& option) | |
| { | |
| WeightStreamingBudget budget; | |
| if (option.find('%') != std::string::npos) | |
| { | |
| double percent = std::stod(option); | |
| if (!(percent >= 0 && percent <= 100.0)) | |
| { | |
| std::ostringstream err; | |
| err << "The weight streaming percent must be between 0 and 100."; | |
| throw std::invalid_argument(err.str()); | |
| } | |
| budget.percent = percent; | |
| } | |
| else | |
| { | |
| double bytes = stringToValue<double>(option); | |
| if (!(bytes == WeightStreamingBudget::kAUTOMATIC || bytes == WeightStreamingBudget::kDISABLE || bytes >= 0)) | |
| { | |
| std::ostringstream err; | |
| err << "The weight streaming budget must be " << WeightStreamingBudget::kDISABLE << ", " | |
| << WeightStreamingBudget::kAUTOMATIC << ", or at least 0."; | |
| throw std::invalid_argument(err.str()); | |
| } | |
| budget.bytes = static_cast<int64_t>(bytes); | |
| } | |
| return budget; | |
| } | |
| template <> | |
| samplesSafeCommon::SafetyPluginLibraryArgument stringToValue<samplesSafeCommon::SafetyPluginLibraryArgument>( | |
| std::string const& option) | |
| { | |
| samplesSafeCommon::SafetyPluginLibraryArgument argument; | |
| auto status = parseSafetyPluginArgument(option, argument); | |
| if (!status) | |
| { | |
| throw std::invalid_argument(std::string("Invalid Safety plugin library option: " + option)); | |
| } | |
| return argument; | |
| } | |
| template <typename T> | |
| std::pair<std::string, T> splitNameAndValue(const std::string& s) | |
| { | |
| std::string tensorName; | |
| std::string valueString; | |
| // Support 'inputName':Path format for --loadInputs flag when dealing with Windows paths. | |
| // i.e. 'inputName':c:\inputData | |
| std::vector<std::string> quoteNameRange{splitToStringVec(s, '\'')}; | |
| // splitToStringVec returns the entire string when delimiter is not found, so it's size is always at least 1 | |
| if (quoteNameRange.size() != 1) | |
| { | |
| if (quoteNameRange.size() != 3) | |
| { | |
| std::string errorMsg = std::string("Found invalid number of \'s when parsing ") + s + | |
| std::string(". Expected: 2, received: ") + std::to_string(quoteNameRange.size() -1) + | |
| ". Please ensure that a singular comma is used within each comma-separated key-value pair for options like --inputIOFormats, --optShapes, --optShapesCalib, --layerPrecisions, etc."; | |
| throw std::invalid_argument(errorMsg); | |
| } | |
| // Everything before the second "'" is the name. | |
| tensorName = quoteNameRange[0] + quoteNameRange[1]; | |
| // Path is the last string - ignoring leading ":" so slice it with [1:] | |
| valueString = quoteNameRange[2].substr(1); | |
| return std::pair<std::string, T>(tensorName, stringToValue<T>(valueString)); | |
| } | |
| // Split on the last : | |
| std::vector<std::string> nameRange{splitToStringVec(s, ':')}; | |
| // Everything before the last : is the name | |
| tensorName = nameRange[0]; | |
| for (size_t i = 1; i < nameRange.size() - 1; i++) | |
| { | |
| tensorName += ":" + nameRange[i]; | |
| } | |
| // Value is the string element after the last : | |
| valueString = nameRange[nameRange.size() - 1]; | |
| return std::pair<std::string, T>(tensorName, stringToValue<T>(valueString)); | |
| } | |
| template <typename T> | |
| void splitInsertKeyValue(const std::vector<std::string>& kvList, T& map) | |
| { | |
| for (const auto& kv : kvList) | |
| { | |
| map.insert(splitNameAndValue<typename T::mapped_type>(kv)); | |
| } | |
| } | |
| const char* boolToEnabled(bool enable) | |
| { | |
| return enable ? "Enabled" : "Disabled"; | |
| } | |
| //! A helper function similar to sep.join(list) in Python. | |
| template <typename T> | |
| std::string joinValuesToString(std::vector<T> const& list, std::string const& sep) | |
| { | |
| std::ostringstream os; | |
| for (int32_t i = 0, n = list.size(); i < n; ++i) | |
| { | |
| os << list[i]; | |
| if (i != n - 1) | |
| { | |
| os << sep; | |
| } | |
| } | |
| return os.str(); | |
| } | |
| template <typename T, size_t N> | |
| std::string joinValuesToString(std::array<T, N> const& list, std::string const& sep) | |
| { | |
| return joinValuesToString(std::vector<T>(list.begin(), list.end()), sep); | |
| } | |
| //! Check if input option exists in input arguments. | |
| //! If it does: set its value, and return true | |
| //! If it does not: return false. | |
| template <typename T> | |
| bool getOption(Arguments& arguments, const std::string& option, T& value) | |
| { | |
| auto const match = arguments.find(option); | |
| if (match != arguments.end()) | |
| { | |
| value = stringToValue<T>(match->second.first); | |
| return true; | |
| } | |
| return false; | |
| } | |
| //! Check if input option exists in input arguments. | |
| //! If it does: set its value, erase the argument and return true. | |
| //! If it does not: return false. | |
| template <typename T> | |
| bool getAndDelOption(Arguments& arguments, const std::string& option, T& value) | |
| { | |
| bool found = getOption(arguments, option, value); | |
| if (found) | |
| { | |
| const auto match = arguments.find(option); | |
| arguments.erase(match); | |
| } | |
| return found; | |
| } | |
| //! Check if input option exists in input arguments. | |
| //! If it does: set its value and position, erase the argument and return true. | |
| //! If it does not: return false. | |
| template <typename T> | |
| bool getAndDelOptionWithPosition(Arguments& arguments, std::string const& option, T& value, int32_t& pos) | |
| { | |
| auto const match = arguments.find(option); | |
| if (match != arguments.end()) | |
| { | |
| value = stringToValue<T>(match->second.first); | |
| pos = match->second.second; | |
| arguments.erase(match); | |
| return true; | |
| } | |
| return false; | |
| } | |
| //! Check if input option exists in input arguments behind the position spcecified by pos. | |
| //! If it does: set its value, erase the argument and return true. | |
| //! If it does not: return false. | |
| template <typename T> | |
| bool getAndDelOptionBehind(Arguments& arguments, std::string const& option, int32_t pos, T& value) | |
| { | |
| auto const match = arguments.equal_range(option); | |
| if (match.first == match.second) | |
| { | |
| return false; | |
| } | |
| for (auto i = match.first; i != match.second; ++i) | |
| { | |
| if (i->second.second - pos == 1) | |
| { | |
| value = stringToValue<T>(i->second.first); | |
| arguments.erase(i); | |
| return true; | |
| } | |
| } | |
| return false; | |
| } | |
| //! Check if input option exists in input arguments. | |
| //! If it does: set false in value, erase the argument and return true. | |
| //! If it does not: return false. | |
| bool getAndDelNegOption(Arguments& arguments, const std::string& option, bool& value) | |
| { | |
| bool dummy; | |
| if (getAndDelOption(arguments, option, dummy)) | |
| { | |
| value = false; | |
| return true; | |
| } | |
| return false; | |
| } | |
| //! Check if input option exists in input arguments. | |
| //! If it does: add all the matched arg values to values vector, erase the argument and return true. | |
| //! If it does not: return false. | |
| template <typename T> | |
| bool getAndDelRepeatedOption(Arguments& arguments, const std::string& option, std::vector<T>& values) | |
| { | |
| const auto match = arguments.equal_range(option); | |
| if (match.first == match.second) | |
| { | |
| return false; | |
| } | |
| auto addToValues | |
| = [&values](Arguments::value_type& argValue) { values.emplace_back(stringToValue<T>(argValue.second.first)); }; | |
| std::for_each(match.first, match.second, addToValues); | |
| arguments.erase(match.first, match.second); | |
| return true; | |
| } | |
| void insertShapesBuild(BuildOptions::ShapeProfile& shapes, nvinfer1::OptProfileSelector selector, | |
| const std::string& name, const std::vector<int64_t>& dims) | |
| { | |
| shapes[name][static_cast<size_t>(selector)] = dims; | |
| } | |
| void insertShapesInference( | |
| InferenceOptions::ShapeProfile& shapes, std::string const& name, std::vector<int64_t> const& dims) | |
| { | |
| shapes[name] = dims; | |
| } | |
| std::string removeSingleQuotationMarks(std::string& str) | |
| { | |
| std::vector<std::string> strList{splitToStringVec(str, '\'')}; | |
| // Remove all the escaped single quotation marks | |
| std::string retVal; | |
| // Do not really care about unterminated sequences | |
| for (size_t i = 0; i < strList.size(); i++) | |
| { | |
| retVal += strList[i]; | |
| } | |
| return retVal; | |
| } | |
| void getLayerPrecisions(Arguments& arguments, char const* argument, LayerPrecisions& layerPrecisions) | |
| { | |
| std::string list; | |
| if (!getAndDelOption(arguments, argument, list)) | |
| { | |
| return; | |
| } | |
| // The layerPrecisions flag contains comma-separated layerName:precision pairs. | |
| std::vector<std::string> precisionList{splitToStringVec(list, ',')}; | |
| for (auto const& s : precisionList) | |
| { | |
| auto namePrecisionPair = splitNameAndValue<nvinfer1::DataType>(s); | |
| auto const layerName = removeSingleQuotationMarks(namePrecisionPair.first); | |
| layerPrecisions[layerName] = namePrecisionPair.second; | |
| } | |
| } | |
| void getLayerOutputTypes(Arguments& arguments, char const* argument, LayerOutputTypes& layerOutputTypes) | |
| { | |
| std::string list; | |
| if (!getAndDelOption(arguments, argument, list)) | |
| { | |
| return; | |
| } | |
| // The layerOutputTypes flag contains comma-separated layerName:types pairs. | |
| std::vector<std::string> precisionList{splitToStringVec(list, ',')}; | |
| for (auto const& s : precisionList) | |
| { | |
| auto namePrecisionPair = splitNameAndValue<std::string>(s); | |
| auto const layerName = removeSingleQuotationMarks(namePrecisionPair.first); | |
| auto const typeStrings = splitToStringVec(namePrecisionPair.second, '+'); | |
| std::vector<nvinfer1::DataType> typeVec(typeStrings.size(), nvinfer1::DataType::kFLOAT); | |
| std::transform(typeStrings.begin(), typeStrings.end(), typeVec.begin(), stringToValue<nvinfer1::DataType>); | |
| layerOutputTypes[layerName] = typeVec; | |
| } | |
| } | |
| void getLayerDeviceTypes(Arguments& arguments, char const* argument, LayerDeviceTypes& layerDeviceTypes) | |
| { | |
| std::string list; | |
| if (!getAndDelOption(arguments, argument, list)) | |
| { | |
| return; | |
| } | |
| // The layerDeviceTypes flag contains comma-separated layerName:deviceType pairs. | |
| std::vector<std::string> deviceList{splitToStringVec(list, ',')}; | |
| for (auto const& s : deviceList) | |
| { | |
| auto nameDevicePair = splitNameAndValue<std::string>(s); | |
| auto const layerName = removeSingleQuotationMarks(nameDevicePair.first); | |
| layerDeviceTypes[layerName] = stringToValue<nvinfer1::DeviceType>(nameDevicePair.second); | |
| } | |
| } | |
| void getAndDelStringsSet(Arguments& arguments, char const* argument, StringSet& stringSet) | |
| { | |
| std::string list; | |
| if (!getAndDelOption(arguments, argument, list)) | |
| { | |
| return; | |
| } | |
| // The layerPrecisions flag contains comma-separated layerName:precision pairs. | |
| std::vector<std::string> strings{splitToStringVec(list, ',')}; | |
| for (auto const& s : strings) | |
| { | |
| stringSet.insert(s); | |
| } | |
| } | |
| bool getShapesBuild(Arguments& arguments, BuildOptions::ShapeProfile& shapes, char const* argument, | |
| nvinfer1::OptProfileSelector selector) | |
| { | |
| std::string list; | |
| bool retVal = getAndDelOption(arguments, argument, list); | |
| std::vector<std::string> shapeList{splitToStringVec(list, ',')}; | |
| for (const auto& s : shapeList) | |
| { | |
| auto nameDimsPair = splitNameAndValue<std::vector<int64_t>>(s); | |
| auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); | |
| auto dims = nameDimsPair.second; | |
| insertShapesBuild(shapes, selector, tensorName, dims); | |
| } | |
| return retVal; | |
| } | |
| bool getShapesInference(Arguments& arguments, InferenceOptions::ShapeProfile& shapes, const char* argument) | |
| { | |
| std::string list; | |
| bool retVal = getAndDelOption(arguments, argument, list); | |
| std::vector<std::string> shapeList{splitToStringVec(list, ',')}; | |
| for (const auto& s : shapeList) | |
| { | |
| auto nameDimsPair = splitNameAndValue<std::vector<int64_t>>(s); | |
| auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); | |
| auto dims = nameDimsPair.second; | |
| insertShapesInference(shapes, tensorName, dims); | |
| } | |
| return retVal; | |
| } | |
| void fillShapes(BuildOptions::ShapeProfile& shapes, std::string const& name, ShapeRange const& sourceShapeRange, | |
| nvinfer1::OptProfileSelector minDimsSource, nvinfer1::OptProfileSelector optDimsSource, | |
| nvinfer1::OptProfileSelector maxDimsSource) | |
| { | |
| insertShapesBuild( | |
| shapes, nvinfer1::OptProfileSelector::kMIN, name, sourceShapeRange[static_cast<size_t>(minDimsSource)]); | |
| insertShapesBuild( | |
| shapes, nvinfer1::OptProfileSelector::kOPT, name, sourceShapeRange[static_cast<size_t>(optDimsSource)]); | |
| insertShapesBuild( | |
| shapes, nvinfer1::OptProfileSelector::kMAX, name, sourceShapeRange[static_cast<size_t>(maxDimsSource)]); | |
| } | |
| void processShapes(BuildOptions::ShapeProfile& shapes, bool minShapes, bool optShapes, bool maxShapes, bool calib) | |
| { | |
| // Only accept optShapes only or all three of minShapes, optShapes, maxShapes when calib is set | |
| if (((minShapes || maxShapes) && !optShapes) // minShapes only, maxShapes only, both minShapes and maxShapes | |
| || (minShapes && !maxShapes && optShapes) // both minShapes and optShapes | |
| || (!minShapes && maxShapes && optShapes)) // both maxShapes and optShapes | |
| { | |
| if (calib) | |
| { | |
| throw std::invalid_argument( | |
| "Must specify only --optShapesCalib or all of --minShapesCalib, --optShapesCalib, --maxShapesCalib"); | |
| } | |
| } | |
| if (!minShapes && !optShapes && !maxShapes) | |
| { | |
| return; | |
| } | |
| BuildOptions::ShapeProfile newShapes; | |
| for (auto& s : shapes) | |
| { | |
| nvinfer1::OptProfileSelector minDimsSource, optDimsSource, maxDimsSource; | |
| minDimsSource = nvinfer1::OptProfileSelector::kMIN; | |
| optDimsSource = nvinfer1::OptProfileSelector::kOPT; | |
| maxDimsSource = nvinfer1::OptProfileSelector::kMAX; | |
| // Populate missing minShapes | |
| if (!minShapes) | |
| { | |
| if (optShapes) | |
| { | |
| minDimsSource = optDimsSource; | |
| sample::gLogWarning << "optShapes is being broadcasted to minShapes for tensor " << s.first | |
| << std::endl; | |
| } | |
| else | |
| { | |
| minDimsSource = maxDimsSource; | |
| sample::gLogWarning << "maxShapes is being broadcasted to minShapes for tensor " << s.first | |
| << std::endl; | |
| } | |
| } | |
| // Populate missing optShapes | |
| if (!optShapes) | |
| { | |
| if (maxShapes) | |
| { | |
| optDimsSource = maxDimsSource; | |
| sample::gLogWarning << "maxShapes is being broadcasted to optShapes for tensor " << s.first | |
| << std::endl; | |
| } | |
| else | |
| { | |
| optDimsSource = minDimsSource; | |
| sample::gLogWarning << "minShapes is being broadcasted to optShapes for tensor " << s.first | |
| << std::endl; | |
| } | |
| } | |
| // Populate missing maxShapes | |
| if (!maxShapes) | |
| { | |
| if (optShapes) | |
| { | |
| maxDimsSource = optDimsSource; | |
| sample::gLogWarning << "optShapes is being broadcasted to maxShapes for tensor " << s.first | |
| << std::endl; | |
| } | |
| else | |
| { | |
| maxDimsSource = minDimsSource; | |
| sample::gLogWarning << "minShapes is being broadcasted to maxShapes for tensor " << s.first | |
| << std::endl; | |
| } | |
| } | |
| fillShapes(newShapes, s.first, s.second, minDimsSource, optDimsSource, maxDimsSource); | |
| } | |
| shapes = newShapes; | |
| } | |
| bool getOptimizationProfiles( | |
| Arguments& arguments, std::vector<BuildOptions::ShapeProfile>& optProfiles, char const* argument) | |
| { | |
| bool retValue{false}; | |
| int32_t pos{}; | |
| size_t profileIndex{}; | |
| auto getShapes | |
| = [](BuildOptions::ShapeProfile& shapes, std::string const& list, nvinfer1::OptProfileSelector selector) { | |
| std::vector<std::string> shapeList{splitToStringVec(list, ',')}; | |
| for (auto const& s : shapeList) | |
| { | |
| auto nameDimsPair = splitNameAndValue<std::vector<int64_t>>(s); | |
| auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); | |
| auto dims = nameDimsPair.second; | |
| insertShapesBuild(shapes, selector, tensorName, dims); | |
| } | |
| }; | |
| while (getAndDelOptionWithPosition(arguments, argument, profileIndex, pos)) | |
| { | |
| BuildOptions::ShapeProfile optProfile{}; | |
| bool minShapes{false}, maxShapes{false}, optShapes{false}; | |
| for (int32_t i = 0; i < nvinfer1::EnumMax<nvinfer1::OptProfileSelector>(); i++, pos++) | |
| { | |
| std::string value; | |
| if (!minShapes && getAndDelOptionBehind(arguments, "--minShapes", pos, value)) | |
| { | |
| minShapes = true; | |
| getShapes(optProfile, value, nvinfer1::OptProfileSelector::kMIN); | |
| } | |
| else if (!maxShapes && getAndDelOptionBehind(arguments, "--maxShapes", pos, value)) | |
| { | |
| maxShapes = true; | |
| getShapes(optProfile, value, nvinfer1::OptProfileSelector::kMAX); | |
| } | |
| else if (!optShapes && getAndDelOptionBehind(arguments, "--optShapes", pos, value)) | |
| { | |
| optShapes = true; | |
| getShapes(optProfile, value, nvinfer1::OptProfileSelector::kOPT); | |
| } | |
| else | |
| { | |
| break; | |
| } | |
| } | |
| processShapes(optProfile, minShapes, optShapes, maxShapes, false); | |
| if (profileIndex >= optProfiles.size()) | |
| { | |
| optProfiles.resize(profileIndex + 1); | |
| } | |
| if (!optProfiles[profileIndex].empty()) | |
| { | |
| throw std::invalid_argument("Optimization profile index cannot be the same."); | |
| } | |
| optProfiles[profileIndex] = optProfile; | |
| retValue = true; | |
| } | |
| profileIndex = 0; | |
| for (auto const& optProfile : optProfiles) | |
| { | |
| if (optProfile.empty()) | |
| { | |
| throw std::invalid_argument(std::string("Found invalid or missing shape spec at profile index ") | |
| + std::to_string(profileIndex) + std::string(". ")); | |
| } | |
| ++profileIndex; | |
| } | |
| return retValue; | |
| } | |
| template <typename T> | |
| void printShapes(std::ostream& os, char const* phase, T const& shapes, int32_t profileIndex) | |
| { | |
| if (shapes.empty()) | |
| { | |
| os << "Input " << phase << " shapes: model" << std::endl; | |
| } | |
| else | |
| { | |
| std::string profileString = (profileIndex != -1 && strcmp(phase, "build") == 0) | |
| ? "(profile " + std::to_string(profileIndex) + ")" | |
| : ""; | |
| for (auto const& s : shapes) | |
| { | |
| os << "Input " << phase << " shape " << profileString << ": " << s.first << "=" << s.second << std::endl; | |
| } | |
| } | |
| } | |
| std::ostream& printTacticSources( | |
| std::ostream& os, nvinfer1::TacticSources enabledSources, nvinfer1::TacticSources disabledSources) | |
| { | |
| if (!enabledSources && !disabledSources) | |
| { | |
| os << "Using default tactic sources"; | |
| } | |
| else | |
| { | |
| auto const addSource = [&](uint32_t source, std::string const& name) | |
| { | |
| if (enabledSources & source) | |
| { | |
| os << name << " [ON], "; | |
| } | |
| else if (disabledSources & source) | |
| { | |
| os << name << " [OFF], "; | |
| } | |
| }; | |
| addSource(1U << static_cast<uint32_t>(nvinfer1::TacticSource::kCUBLAS), "cublas"); | |
| addSource(1U << static_cast<uint32_t>(nvinfer1::TacticSource::kCUBLAS_LT), "cublasLt"); | |
| addSource(1U << static_cast<uint32_t>(nvinfer1::TacticSource::kCUDNN), "cudnn"); | |
| addSource( | |
| 1U << static_cast<uint32_t>(nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS), "edge mask convolutions"); | |
| addSource(1U << static_cast<uint32_t>(nvinfer1::TacticSource::kJIT_CONVOLUTIONS), "JIT convolutions"); | |
| } | |
| return os; | |
| } | |
| std::ostream& printPrecision(std::ostream& os, BuildOptions const& options) | |
| { | |
| if (options.stronglyTyped) | |
| { | |
| os << "Strongly Typed"; | |
| return os; | |
| } | |
| os << "FP32"; | |
| if (options.fp16) | |
| { | |
| os << "+FP16"; | |
| } | |
| if (options.bf16) | |
| { | |
| os << "+BF16"; | |
| } | |
| if (options.int8) | |
| { | |
| os << "+INT8"; | |
| } | |
| if (options.fp8) | |
| { | |
| os << "+FP8"; | |
| } | |
| if (options.int4) | |
| { | |
| os << "+INT4"; | |
| } | |
| if (options.precisionConstraints == PrecisionConstraints::kOBEY) | |
| { | |
| os << " (obey precision constraints)"; | |
| } | |
| if (options.precisionConstraints == PrecisionConstraints::kPREFER) | |
| { | |
| os << " (prefer precision constraints)"; | |
| } | |
| return os; | |
| } | |
| std::ostream& printTempfileControls(std::ostream& os, TempfileControlFlags const tempfileControls) | |
| { | |
| auto getFlag = [&](TempfileControlFlag f) -> char const* | |
| { | |
| bool allowed = !!(tempfileControls & (1U << static_cast<int64_t>(f))); | |
| return allowed ? "allow" : "deny"; | |
| }; | |
| auto const inMemory = getFlag(TempfileControlFlag::kALLOW_IN_MEMORY_FILES); | |
| auto const temporary = getFlag(TempfileControlFlag::kALLOW_TEMPORARY_FILES); | |
| os << "{ in_memory: " << inMemory << ", temporary: " << temporary << " }"; | |
| return os; | |
| } | |
| std::ostream& printTimingCache(std::ostream& os, TimingCacheMode const& timingCacheMode) | |
| { | |
| switch (timingCacheMode) | |
| { | |
| case TimingCacheMode::kGLOBAL: os << "global"; break; | |
| case TimingCacheMode::kLOCAL: os << "local"; break; | |
| case TimingCacheMode::kDISABLE: os << "disable"; break; | |
| } | |
| return os; | |
| } | |
| std::ostream& printSparsity(std::ostream& os, BuildOptions const& options) | |
| { | |
| switch (options.sparsity) | |
| { | |
| case SparsityFlag::kDISABLE: os << "Disabled"; break; | |
| case SparsityFlag::kENABLE: os << "Enabled"; break; | |
| case SparsityFlag::kFORCE: os << "Forced"; break; | |
| } | |
| return os; | |
| } | |
| std::ostream& printMemoryPools(std::ostream& os, BuildOptions const& options) | |
| { | |
| auto const printValueOrDefault = [&os](double const val, char const* unit = "MiB") | |
| { | |
| if (val >= 0) | |
| { | |
| os << val << " " << unit; | |
| } | |
| else | |
| { | |
| os << "default"; | |
| } | |
| }; | |
| os << "workspace: "; | |
| printValueOrDefault(options.workspace); | |
| os << ", "; | |
| os << "dlaSRAM: "; | |
| printValueOrDefault(options.dlaSRAM); | |
| os << ", "; | |
| os << "dlaLocalDRAM: "; | |
| printValueOrDefault(options.dlaLocalDRAM); | |
| os << ", "; | |
| os << "dlaGlobalDRAM: "; | |
| printValueOrDefault(options.dlaGlobalDRAM); | |
| os << ", "; | |
| os << "tacticSharedMem: "; | |
| printValueOrDefault(options.tacticSharedMem, "KiB"); | |
| return os; | |
| } | |
| std::string previewFeatureToString(PreviewFeature feature) | |
| { | |
| // clang-format off | |
| switch (feature) | |
| { | |
| case PreviewFeature::kPROFILE_SHARING_0806: | |
| { | |
| gLogWarning << "profileSharing0806 is on by default in TensorRT 10.0. This flag is deprecated and has no effect." << std::endl; | |
| break; | |
| } | |
| case PreviewFeature::kALIASED_PLUGIN_IO_10_03: return "kALIASED_PLUGIN_IO_10_03"; | |
| case PreviewFeature::kRUNTIME_ACTIVATION_RESIZE_10_10: return "kRUNTIME_ACTIVATION_RESIZE_10_10"; | |
| } | |
| return "Invalid Preview Feature"; | |
| // clang-format on | |
| } | |
| std::ostream& printPreviewFlags(std::ostream& os, BuildOptions const& options) | |
| { | |
| if (options.previewFeatures.empty()) | |
| { | |
| os << "Use default preview flags."; | |
| return os; | |
| } | |
| auto const addFlag = [&](PreviewFeature feat) { | |
| int32_t featVal = static_cast<int32_t>(feat); | |
| if (options.previewFeatures.find(featVal) != options.previewFeatures.end()) | |
| { | |
| os << previewFeatureToString(feat) << (options.previewFeatures.at(featVal) ? " [ON], " : " [OFF], "); | |
| } | |
| }; | |
| addFlag(PreviewFeature::kALIASED_PLUGIN_IO_10_03); | |
| addFlag(PreviewFeature::kRUNTIME_ACTIVATION_RESIZE_10_10); | |
| return os; | |
| } | |
| } // namespace | |
| Arguments argsToArgumentsMap(int32_t argc, char* argv[]) | |
| { | |
| Arguments arguments; | |
| for (int32_t i = 1; i < argc; ++i) | |
| { | |
| auto valuePtr = strchr(argv[i], '='); | |
| if (valuePtr) | |
| { | |
| std::string value{valuePtr + 1}; | |
| arguments.emplace(std::string(argv[i], valuePtr - argv[i]), std::make_pair(value, i)); | |
| } | |
| else | |
| { | |
| arguments.emplace(argv[i], std::make_pair(std::string(""), i)); | |
| } | |
| } | |
| return arguments; | |
| } | |
| namespace | |
| { | |
| std::string resolveHomeDirectoryOnLinux(std::string const& model) | |
| { | |
| std::string filePath{model}; | |
| if (filePath[0] == '~') | |
| { | |
| char const* home = std::getenv("HOME"); | |
| if (home) | |
| { | |
| filePath.replace(0, 1, home); | |
| } | |
| } | |
| return filePath; | |
| } | |
| } // namespace | |
| void BaseModelOptions::parse(Arguments& arguments) | |
| { | |
| if (getAndDelOption(arguments, "--onnx", model)) | |
| { | |
| format = ModelFormat::kONNX; | |
| model = resolveHomeDirectoryOnLinux(model); | |
| } | |
| } | |
| void ModelOptions::parse(Arguments& arguments) | |
| { | |
| baseModel.parse(arguments); | |
| switch (baseModel.format) | |
| { | |
| case ModelFormat::kONNX: | |
| case ModelFormat::kANY: | |
| { | |
| break; | |
| } | |
| } | |
| if (baseModel.format == ModelFormat::kONNX) | |
| { | |
| if (!outputs.empty()) | |
| { | |
| throw std::invalid_argument("The --output flag should not be used with ONNX models."); | |
| } | |
| } | |
| } | |
| void getTempfileControls(Arguments& arguments, char const* argument, TempfileControlFlags& tempfileControls) | |
| { | |
| std::string list; | |
| if (!getAndDelOption(arguments, argument, list)) | |
| { | |
| return; | |
| } | |
| std::vector<std::string> controlList{splitToStringVec(list, ',')}; | |
| for (auto const& s : controlList) | |
| { | |
| auto controlAllowPair = splitNameAndValue<std::string>(s); | |
| bool allowed{false}; | |
| int32_t offset{-1}; | |
| if (controlAllowPair.second.compare("allow") == 0) | |
| { | |
| allowed = true; | |
| } | |
| else if (controlAllowPair.second.compare("deny") != 0) | |
| { | |
| throw std::invalid_argument("--tempfileControls value should be `deny` or `allow`"); | |
| } | |
| if (controlAllowPair.first.compare("in_memory") == 0) | |
| { | |
| offset = static_cast<int32_t>(TempfileControlFlag::kALLOW_IN_MEMORY_FILES); | |
| } | |
| else if (controlAllowPair.first.compare("temporary") == 0) | |
| { | |
| offset = static_cast<int32_t>(TempfileControlFlag::kALLOW_TEMPORARY_FILES); | |
| } | |
| else | |
| { | |
| throw std::invalid_argument(std::string{"Unknown --tempfileControls key "} + controlAllowPair.first); | |
| } | |
| if (allowed) | |
| { | |
| tempfileControls |= (1U << offset); | |
| } | |
| else | |
| { | |
| tempfileControls &= ~(1U << offset); | |
| } | |
| } | |
| } | |
| void BuildOptions::parse(Arguments& arguments) | |
| { | |
| auto getFormats = [&arguments](std::vector<IOFormat>& formatsVector, const char* argument) | |
| { | |
| std::string list; | |
| getAndDelOption(arguments, argument, list); | |
| std::vector<std::string> formats{splitToStringVec(list, ',')}; | |
| for (const auto& f : formats) | |
| { | |
| formatsVector.push_back(stringToValue<IOFormat>(f)); | |
| } | |
| }; | |
| getFormats(inputFormats, "--inputIOFormats"); | |
| getFormats(outputFormats, "--outputIOFormats"); | |
| bool getCalibProfile = getAndDelOption(arguments, "--calibProfile", calibProfile); | |
| if (!getOptimizationProfiles(arguments, optProfiles, "--profile")) | |
| { | |
| ShapeProfile shapes; | |
| bool minShapes{false}, optShapes{false}, maxShapes{false}; | |
| try | |
| { | |
| minShapes = getShapesBuild(arguments, shapes, "--minShapes", nvinfer1::OptProfileSelector::kMIN); | |
| optShapes = getShapesBuild(arguments, shapes, "--optShapes", nvinfer1::OptProfileSelector::kOPT); | |
| maxShapes = getShapesBuild(arguments, shapes, "--maxShapes", nvinfer1::OptProfileSelector::kMAX); | |
| } | |
| catch (std::invalid_argument const& arg) | |
| { | |
| throw std::invalid_argument(arg.what() | |
| + std::string(" conversion failure: failed to parse minShapes/optShapes/maxShapes. Please double check " | |
| "your input string.")); | |
| } | |
| processShapes(shapes, minShapes, optShapes, maxShapes, false); | |
| optProfiles.emplace_back(shapes); | |
| } | |
| if (calibProfile >= optProfiles.size()) | |
| { | |
| throw std::invalid_argument( | |
| std::string("--calibProfile shouldn't greater than the size of optimization profile.")); | |
| } | |
| BuildOptions::ShapeProfile dummyShapes; | |
| bool remainingMinShapes = getShapesBuild(arguments, dummyShapes, "--minShapes", nvinfer1::OptProfileSelector::kMIN); | |
| bool remainingOptShapes = getShapesBuild(arguments, dummyShapes, "--optShapes", nvinfer1::OptProfileSelector::kOPT); | |
| bool remainingMaxShapes = getShapesBuild(arguments, dummyShapes, "--maxShapes", nvinfer1::OptProfileSelector::kMAX); | |
| if (remainingMinShapes || remainingOptShapes || remainingMaxShapes) | |
| { | |
| throw std::invalid_argument("Multiple --minShapes/--optShapes/--maxShapes without --profile are not allowed. "); | |
| } | |
| bool minShapesCalib{false}, optShapesCalib{false}, maxShapesCalib{false}; | |
| try | |
| { | |
| minShapesCalib = getShapesBuild(arguments, shapesCalib, "--minShapesCalib", nvinfer1::OptProfileSelector::kMIN); | |
| optShapesCalib = getShapesBuild(arguments, shapesCalib, "--optShapesCalib", nvinfer1::OptProfileSelector::kOPT); | |
| maxShapesCalib = getShapesBuild(arguments, shapesCalib, "--maxShapesCalib", nvinfer1::OptProfileSelector::kMAX); | |
| } | |
| catch (std::invalid_argument const& arg) | |
| { | |
| throw std::invalid_argument(arg.what() | |
| + std::string(" conversion failure: failed to parse minShapesCalib/optShapesCalib/maxShapesCalib. Please " | |
| "double check your input string.")); | |
| } | |
| processShapes(shapesCalib, minShapesCalib, optShapesCalib, maxShapesCalib, true); | |
| std::string memPoolSizes; | |
| getAndDelOption(arguments, "--memPoolSize", memPoolSizes); | |
| std::vector<std::string> memPoolSpecs{splitToStringVec(memPoolSizes, ',')}; | |
| for (auto const& memPoolSpec : memPoolSpecs) | |
| { | |
| std::string memPoolName; | |
| double memPoolSize; | |
| try | |
| { | |
| std::string strPoolSize; | |
| std::tie(memPoolName, strPoolSize) = splitNameAndValue<std::string>(memPoolSpec); | |
| memPoolSize = stringToValue<double>(addDefaultUnitSuffixIfNotSpecified(strPoolSize, 'M')); | |
| } | |
| catch (std::invalid_argument const& arg) | |
| { | |
| throw std::invalid_argument(arg.what() | |
| + std::string( | |
| " conversion failure: failed to parse --memPoolSize. Please double check your input string.")); | |
| } | |
| if (memPoolSize < 0) | |
| { | |
| throw std::invalid_argument(std::string("Negative memory pool size: ") + std::to_string(memPoolSize)); | |
| } | |
| if (memPoolName == "workspace") | |
| { | |
| // use unit in MB. | |
| workspace = memPoolSize / 1.0_MiB; | |
| } | |
| else if (memPoolName == "dlaSRAM") | |
| { | |
| // use unit in MB. | |
| dlaSRAM = memPoolSize / 1.0_MiB; | |
| } | |
| else if (memPoolName == "dlaLocalDRAM") | |
| { | |
| // use unit in MB. | |
| dlaLocalDRAM = memPoolSize / 1.0_MiB; | |
| } | |
| else if (memPoolName == "dlaGlobalDRAM") | |
| { | |
| // use unit in MB. | |
| dlaGlobalDRAM = memPoolSize / 1.0_MiB; | |
| } | |
| else if (memPoolName == "tacticSharedMem") | |
| { | |
| // use unit in KB. | |
| tacticSharedMem = memPoolSize / 1.0_KiB; | |
| } | |
| else if (!memPoolName.empty()) | |
| { | |
| throw std::invalid_argument(std::string("Unknown memory pool: ") + memPoolName); | |
| } | |
| } | |
| getAndDelOption(arguments, "--avgTiming", avgTiming); | |
| bool best{false}; | |
| getAndDelOption(arguments, "--best", best); | |
| if (best) | |
| { | |
| int8 = (samplesCommon::getSMVersion() != 0x0a03); | |
| fp16 = true; | |
| // BF16 only supported on Ampere+ | |
| if (samplesCommon::getSMVersion() >= 0x0800) | |
| { | |
| bf16 = true; | |
| } | |
| } | |
| getAndDelOption(arguments, "--refit", refittable); | |
| getAndDelOption(arguments, "--weightless", stripWeights); | |
| getAndDelOption(arguments, "--stripWeights", stripWeights); | |
| bool stripAllWeights{}; | |
| getAndDelOption(arguments, "--stripAllWeights", stripAllWeights); | |
| if (stripAllWeights) | |
| { | |
| refittable = true; | |
| stripWeights = true; | |
| } | |
| // --vc and --versionCompatible are synonyms | |
| getAndDelOption(arguments, "--vc", versionCompatible); | |
| if (!versionCompatible) | |
| { | |
| getAndDelOption(arguments, "--versionCompatible", versionCompatible); | |
| } | |
| // --pi and --pluginInstanceNorm are synonyms | |
| getAndDelOption(arguments, "--pi", pluginInstanceNorm); | |
| if (!pluginInstanceNorm) | |
| { | |
| getAndDelOption(arguments, "--pluginInstanceNorm", pluginInstanceNorm); | |
| } | |
| getAndDelOption(arguments, "--uint8AsymmetricQuantizationDLA", enableUInt8AsymmetricQuantizationDLA); | |
| getAndDelOption(arguments, "--excludeLeanRuntime", excludeLeanRuntime); | |
| getAndDelOption(arguments, "--noCompilationCache", disableCompilationCache); | |
| getAndDelOption(arguments, "--monitorMemory", enableMonitorMemory); | |
| getAndDelNegOption(arguments, "--noTF32", tf32); | |
| getAndDelOption(arguments, "--fp16", fp16); | |
| getAndDelOption(arguments, "--bf16", bf16); | |
| getAndDelOption(arguments, "--int8", int8); | |
| getAndDelOption(arguments, "--fp8", fp8); | |
| getAndDelOption(arguments, "--int4", int4); | |
| getAndDelOption(arguments, "--stronglyTyped", stronglyTyped); | |
| getAndDelOption(arguments, "--distributiveIndependence", distributiveIndependence); | |
| if (best && stronglyTyped) | |
| { | |
| throw std::invalid_argument( | |
| "--best and --stronglyTyped cannot be both set. --best enables implicit precisions, while " | |
| "--stronglyTyped enforces explicit precisions."); | |
| } | |
| if (stronglyTyped) | |
| { | |
| auto disableAndLog = [](bool& flag, std::string mode, std::string type) | |
| { | |
| if (flag) | |
| { | |
| flag = false; | |
| sample::gLogWarning << "Invalid usage, setting " << mode | |
| << " mode is not allowed if graph is strongly typed. Disabling BuilderFlag::" | |
| << type << "." << std::endl; | |
| } | |
| }; | |
| disableAndLog(fp16, "fp16", "kFP16"); | |
| disableAndLog(int8, "int8", "kINT8"); | |
| disableAndLog(bf16, "bf16", "kBF16"); | |
| disableAndLog(fp8, "fp8", "kFP8"); | |
| disableAndLog(int4, "int4", "kINT4"); | |
| } | |
| // Print a message to tell users that --noTF32 can be added to improve accuracy with performance cost. | |
| if (samplesCommon::getSMVersion() >= 0x0800) | |
| { | |
| if (!(stronglyTyped || fp16 || bf16 || int8 || fp8 || int4)) | |
| { | |
| sample::gLogInfo << "TF32 is enabled by default. Add --noTF32 flag to further improve accuracy with some " | |
| << "performance cost." << std::endl; | |
| } | |
| } | |
| if (fp8 && int8) | |
| { | |
| throw std::invalid_argument("Invalid usage, fp8 and int8 aren't allowed to be enabled together."); | |
| } | |
| getAndDelOption(arguments, "--safe", safe); | |
| getAndDelOption(arguments, "--buildDLAStandalone", buildDLAStandalone); | |
| getAndDelOption(arguments, "--allowGPUFallback", allowGPUFallback); | |
| getAndDelOption(arguments, "--consistency", consistency); | |
| getAndDelOption(arguments, "--restricted", restricted); | |
| getAndDelOption(arguments, "--skipInference", skipInference); | |
| if (getAndDelOption(arguments, "--directIO", directIO)) | |
| { | |
| sample::gLogWarning << "--directIO flag has been deprecated" << std::endl; | |
| } | |
| std::string precisionConstraintsString; | |
| getAndDelOption(arguments, "--precisionConstraints", precisionConstraintsString); | |
| if (!precisionConstraintsString.empty()) | |
| { | |
| const std::unordered_map<std::string, PrecisionConstraints> precisionConstraintsMap | |
| = {{"obey", PrecisionConstraints::kOBEY}, {"prefer", PrecisionConstraints::kPREFER}, | |
| {"none", PrecisionConstraints::kNONE}}; | |
| auto it = precisionConstraintsMap.find(precisionConstraintsString); | |
| if (it == precisionConstraintsMap.end()) | |
| { | |
| throw std::invalid_argument(std::string("Unknown precision constraints: ") + precisionConstraintsString); | |
| } | |
| precisionConstraints = it->second; | |
| } | |
| else | |
| { | |
| precisionConstraints = PrecisionConstraints::kNONE; | |
| } | |
| getLayerPrecisions(arguments, "--layerPrecisions", layerPrecisions); | |
| getLayerOutputTypes(arguments, "--layerOutputTypes", layerOutputTypes); | |
| getLayerDeviceTypes(arguments, "--layerDeviceTypes", layerDeviceTypes); | |
| if (layerPrecisions.empty() && layerOutputTypes.empty() && precisionConstraints != PrecisionConstraints::kNONE) | |
| { | |
| sample::gLogWarning << R"(When --precisionConstraints flag is set to "obey" or "prefer", please add )" | |
| << "--layerPrecision/--layerOutputTypes flags to set layer-wise precisions and output " | |
| << "types." << std::endl; | |
| } | |
| else if ((!layerPrecisions.empty() || !layerOutputTypes.empty()) | |
| && precisionConstraints == PrecisionConstraints::kNONE) | |
| { | |
| sample::gLogWarning << "--layerPrecision/--layerOutputTypes flags have no effect when --precisionConstraints " | |
| << R"(flag is set to "none".)" << std::endl; | |
| } | |
| getAndDelStringsSet(arguments, "--markDebug", debugTensors); | |
| getAndDelOption(arguments, "--markUnfusedTensorsAsDebugTensors", markUnfusedTensorsAsDebugTensors); | |
| getAndDelOption(arguments, "--sparsity", sparsity); | |
| bool calibCheck = getAndDelOption(arguments, "--calib", calibration); | |
| if (int8 && calibCheck && !optProfiles[calibProfile].empty() && shapesCalib.empty()) | |
| { | |
| shapesCalib = optProfiles[calibProfile]; | |
| } | |
| else if (!shapesCalib.empty() && getCalibProfile) | |
| { | |
| sample::gLogWarning | |
| << "--calibProfile have no effect when --minShapesCalib/--optShapesCalib/--maxShapesCalib is set." | |
| << std::endl; | |
| } | |
| std::string profilingVerbosityString; | |
| getAndDelOption(arguments, "--profilingVerbosity", profilingVerbosityString); | |
| if (profilingVerbosityString == "layer_names_only") | |
| { | |
| profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY; | |
| } | |
| else if (profilingVerbosityString == "none") | |
| { | |
| profilingVerbosity = nvinfer1::ProfilingVerbosity::kNONE; | |
| } | |
| else if (profilingVerbosityString == "detailed") | |
| { | |
| profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED; | |
| } | |
| else if (profilingVerbosityString == "default") | |
| { | |
| sample::gLogWarning << "--profilingVerbosity=default has been deprecated by " | |
| "--profilingVerbosity=layer_names_only." | |
| << std::endl; | |
| profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY; | |
| } | |
| else if (profilingVerbosityString == "verbose") | |
| { | |
| sample::gLogWarning << "--profilingVerbosity=verbose has been deprecated by --profilingVerbosity=detailed." | |
| << std::endl; | |
| profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED; | |
| } | |
| else if (!profilingVerbosityString.empty()) | |
| { | |
| throw std::invalid_argument(std::string("Unknown profilingVerbosity: ") + profilingVerbosityString); | |
| } | |
| if (getAndDelOption(arguments, "--loadEngine", engine)) | |
| { | |
| load = true; | |
| } | |
| getAndDelOption(arguments, "--asyncFileReader", asyncFileReader); | |
| getAndDelOption(arguments, "--getPlanVersionOnly", getPlanVersionOnly); | |
| if (getAndDelOption(arguments, "--saveEngine", engine)) | |
| { | |
| save = true; | |
| } | |
| if (load && save) | |
| { | |
| throw std::invalid_argument("Incompatible load and save engine options selected"); | |
| } | |
| std::string tacticSourceArgs; | |
| if (getAndDelOption(arguments, "--tacticSources", tacticSourceArgs)) | |
| { | |
| std::vector<std::string> tacticList = splitToStringVec(tacticSourceArgs, ','); | |
| for (auto& t : tacticList) | |
| { | |
| bool enable{false}; | |
| if (t.front() == '+') | |
| { | |
| enable = true; | |
| } | |
| else if (t.front() != '-') | |
| { | |
| throw std::invalid_argument( | |
| "Tactic source must be prefixed with + or -, indicating whether it should be enabled or disabled " | |
| "respectively."); | |
| } | |
| t.erase(0, 1); | |
| const auto toUpper = [](std::string& sourceName) | |
| { | |
| std::transform( | |
| sourceName.begin(), sourceName.end(), sourceName.begin(), [](char c) { return std::toupper(c); }); | |
| return sourceName; | |
| }; | |
| nvinfer1::TacticSource source{}; | |
| t = toUpper(t); | |
| if (t == "CUBLAS") | |
| { | |
| source = nvinfer1::TacticSource::kCUBLAS; | |
| } | |
| else if (t == "CUBLASLT" || t == "CUBLAS_LT") | |
| { | |
| source = nvinfer1::TacticSource::kCUBLAS_LT; | |
| } | |
| else | |
| if (t == "CUDNN") | |
| { | |
| source = nvinfer1::TacticSource::kCUDNN; | |
| } | |
| else if (t == "EDGE_MASK_CONVOLUTIONS") | |
| { | |
| source = nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS; | |
| } | |
| else if (t == "JIT_CONVOLUTIONS") | |
| { | |
| source = nvinfer1::TacticSource::kJIT_CONVOLUTIONS; | |
| } | |
| else | |
| { | |
| throw std::invalid_argument(std::string("Unknown tactic source: ") + t); | |
| } | |
| uint32_t sourceBit = 1U << static_cast<uint32_t>(source); | |
| if (enable) | |
| { | |
| enabledTactics |= sourceBit; | |
| } | |
| else | |
| { | |
| disabledTactics |= sourceBit; | |
| } | |
| if (enabledTactics & disabledTactics) | |
| { | |
| throw std::invalid_argument(std::string("Cannot enable and disable ") + t); | |
| } | |
| } | |
| } | |
| bool noBuilderCache{false}; | |
| getAndDelOption(arguments, "--noBuilderCache", noBuilderCache); | |
| getAndDelOption(arguments, "--timingCacheFile", timingCacheFile); | |
| if (noBuilderCache) | |
| { | |
| timingCacheMode = TimingCacheMode::kDISABLE; | |
| } | |
| else if (!timingCacheFile.empty()) | |
| { | |
| timingCacheMode = TimingCacheMode::kGLOBAL; | |
| } | |
| else | |
| { | |
| timingCacheMode = TimingCacheMode::kLOCAL; | |
| } | |
| getAndDelOption(arguments, "--errorOnTimingCacheMiss", errorOnTimingCacheMiss); | |
| getAndDelOption(arguments, "--builderOptimizationLevel", builderOptimizationLevel); | |
| getAndDelOption(arguments, "--maxTactics", maxTactics); | |
| std::string runtimePlatformArgs; | |
| getAndDelOption(arguments, "--runtimePlatform", runtimePlatformArgs); | |
| if (runtimePlatformArgs == "SameAsBuild" || runtimePlatformArgs.empty()) | |
| { | |
| runtimePlatform = RuntimePlatform::kSAME_AS_BUILD; | |
| } | |
| else if (runtimePlatformArgs == "WindowsAMD64") | |
| { | |
| runtimePlatform = RuntimePlatform::kWINDOWS_AMD64; | |
| } | |
| else | |
| { | |
| throw std::invalid_argument(std::string("Unknown runtime platform: ") + runtimePlatformArgs | |
| + ". Valid options: SameAsBuild, WindowsAMD64."); | |
| } | |
| std::string hardwareCompatibleArgs; | |
| getAndDelOption(arguments, "--hardwareCompatibilityLevel", hardwareCompatibleArgs); | |
| if (hardwareCompatibleArgs == "none" || hardwareCompatibleArgs.empty()) | |
| { | |
| hardwareCompatibilityLevel = HardwareCompatibilityLevel::kNONE; | |
| } | |
| else if (samplesCommon::toLower(hardwareCompatibleArgs) == "ampere+") | |
| { | |
| hardwareCompatibilityLevel = HardwareCompatibilityLevel::kAMPERE_PLUS; | |
| } | |
| else if (samplesCommon::toLower(hardwareCompatibleArgs) == "samecomputecapability") | |
| { | |
| hardwareCompatibilityLevel = HardwareCompatibilityLevel::kSAME_COMPUTE_CAPABILITY; | |
| } | |
| else | |
| { | |
| throw std::invalid_argument(std::string("Unknown hardwareCompatibilityLevel: ") + hardwareCompatibleArgs | |
| + ". Valid options: none, ampere+, sameComputeCapability."); | |
| } | |
| if (pluginInstanceNorm | |
| && (versionCompatible || hardwareCompatibilityLevel == HardwareCompatibilityLevel::kAMPERE_PLUS)) | |
| { | |
| throw std::invalid_argument( | |
| "Plugin InstanceNorm cannot be used with version compatible or hardware compatible engines!"); | |
| } | |
| getAndDelOption(arguments, "--maxAuxStreams", maxAuxStreams); | |
| std::string previewFeaturesBuf; | |
| getAndDelOption(arguments, "--preview", previewFeaturesBuf); | |
| std::vector<std::string> previewFeaturesVec{splitToStringVec(previewFeaturesBuf, ',')}; | |
| for (auto featureName : previewFeaturesVec) | |
| { | |
| bool enable{false}; | |
| if (featureName.front() == '+') | |
| { | |
| enable = true; | |
| } | |
| else if (featureName.front() != '-') | |
| { | |
| throw std::invalid_argument( | |
| "Preview features must be prefixed with + or -, indicating whether it should be enabled or disabled " | |
| "respectively."); | |
| } | |
| featureName.erase(0, 1); | |
| PreviewFeature feat{}; | |
| if (featureName == "profileSharing0806") | |
| { | |
| sample::gLogWarning | |
| << "profileSharing0806 is on by default in TensorRT 10.0. This flag is deprecated and has no effect." | |
| << std::endl; | |
| } | |
| else if (featureName == "aliasedPluginIO1003") | |
| { | |
| feat = PreviewFeature::kALIASED_PLUGIN_IO_10_03; | |
| } | |
| else if (featureName == "runtimeActivationResize") | |
| { | |
| feat = PreviewFeature::kRUNTIME_ACTIVATION_RESIZE_10_10; | |
| } | |
| else | |
| { | |
| throw std::invalid_argument(std::string("Unknown preview feature: ") + featureName); | |
| } | |
| previewFeatures[static_cast<int32_t>(feat)] = enable; | |
| } | |
| getAndDelOption(arguments, "--tempdir", tempdir); | |
| getTempfileControls(arguments, "--tempfileControls", tempfileControls); | |
| std::string runtimeMode; | |
| getAndDelOption(arguments, "--useRuntime", runtimeMode); | |
| if (runtimeMode == "full") | |
| { | |
| useRuntime = RuntimeMode::kFULL; | |
| } | |
| else if (runtimeMode == "dispatch") | |
| { | |
| useRuntime = RuntimeMode::kDISPATCH; | |
| } | |
| else if (runtimeMode == "lean") | |
| { | |
| useRuntime = RuntimeMode::kLEAN; | |
| } | |
| else if (!runtimeMode.empty()) | |
| { | |
| throw std::invalid_argument(std::string("Unknown useRuntime: ") + runtimeMode); | |
| } | |
| if ((useRuntime == RuntimeMode::kDISPATCH || useRuntime == RuntimeMode::kLEAN) && !versionCompatible) | |
| { | |
| versionCompatible = true; | |
| sample::gLogWarning << "Implicitly enabling --versionCompatible since --useRuntime=" << runtimeMode | |
| << " is set." << std::endl; | |
| } | |
| if (useRuntime != RuntimeMode::kFULL && !load) | |
| { | |
| throw std::invalid_argument(std::string("Building a TensorRT engine requires --useRuntime=full.")); | |
| } | |
| getAndDelOption(arguments, "--leanDLLPath", leanDLLPath); | |
| // Don't delete the option because the inference option parser requires it | |
| getOption(arguments, "--allowWeightStreaming", allowWeightStreaming); | |
| getAndDelOption(arguments, "--tilingOptimizationLevel", tilingOptimizationLevel); | |
| getAndDelOption(arguments, "--l2LimitForTiling", l2LimitForTiling); | |
| getAndDelOption(arguments, "--remoteAutoTuningConfig", remoteAutoTuningConfig); | |
| if (!remoteAutoTuningConfig.empty() && !safe) | |
| { | |
| throw std::invalid_argument( | |
| "Remote auto tuning is not supported in standard build. Use --safe flag to enable it."); | |
| } | |
| } | |
| void SystemOptions::parse(Arguments& arguments) | |
| { | |
| getAndDelOption(arguments, "--device", device); | |
| getAndDelOption(arguments, "--useDLACore", DLACore); | |
| std::string pluginName; | |
| while (getAndDelOption(arguments, "--plugins", pluginName)) | |
| { | |
| sample::gLogWarning << "--plugins flag has been deprecated, use --staticPlugins flag instead." << std::endl; | |
| plugins.emplace_back(pluginName); | |
| } | |
| while (getAndDelOption(arguments, "--staticPlugins", pluginName)) | |
| { | |
| plugins.emplace_back(pluginName); | |
| } | |
| while (getAndDelOption(arguments, "--setPluginsToSerialize", pluginName)) | |
| { | |
| setPluginsToSerialize.emplace_back(pluginName); | |
| } | |
| while (getAndDelOption(arguments, "--dynamicPlugins", pluginName)) | |
| { | |
| dynamicPlugins.emplace_back(pluginName); | |
| } | |
| samplesSafeCommon::SafetyPluginLibraryArgument safetyPluginOption; | |
| while (getAndDelOption(arguments, "--safetyPlugins", safetyPluginOption)) | |
| { | |
| safetyPlugins.emplace_back(std::move(safetyPluginOption)); | |
| } | |
| getAndDelOption(arguments, "--ignoreParsedPluginLibs", ignoreParsedPluginLibs); | |
| } | |
| constexpr int64_t WeightStreamingBudget::kDISABLE; | |
| constexpr int64_t WeightStreamingBudget::kAUTOMATIC; | |
| void InferenceOptions::parse(Arguments& arguments) | |
| { | |
| if (getAndDelOption(arguments, "--streams", infStreams)) | |
| { | |
| sample::gLogWarning << "--streams flag has been deprecated, use --infStreams flag instead." << std::endl; | |
| } | |
| getAndDelOption(arguments, "--infStreams", infStreams); | |
| getAndDelOption(arguments, "--iterations", iterations); | |
| getAndDelOption(arguments, "--duration", duration); | |
| getAndDelOption(arguments, "--warmUp", warmup); | |
| getAndDelOption(arguments, "--sleepTime", sleep); | |
| getAndDelOption(arguments, "--idleTime", idle); | |
| bool exposeDMA{false}; | |
| if (getAndDelOption(arguments, "--exposeDMA", exposeDMA)) | |
| { | |
| overlap = !exposeDMA; | |
| } | |
| getAndDelOption(arguments, "--noDataTransfers", skipTransfers); | |
| getAndDelOption(arguments, "--useManagedMemory", useManaged); | |
| getAndDelOption(arguments, "--useSpinWait", spin); | |
| getAndDelOption(arguments, "--threads", threads); | |
| getAndDelOption(arguments, "--useCudaGraph", graph); | |
| getAndDelOption(arguments, "--separateProfileRun", rerun); | |
| getAndDelOption(arguments, "--timeDeserialize", timeDeserialize); | |
| getAndDelOption(arguments, "--timeRefit", timeRefit); | |
| getAndDelOption(arguments, "--persistentCacheRatio", persistentCacheRatio); | |
| std::string list; | |
| getAndDelOption(arguments, "--loadInputs", list); | |
| std::vector<std::string> inputsList{splitToStringVec(list, ',')}; | |
| splitInsertKeyValue(inputsList, inputs); | |
| getShapesInference(arguments, shapes, "--shapes"); | |
| setOptProfile = getAndDelOption(arguments, "--useProfile", optProfileIndex); | |
| std::string allocationStrategyString; | |
| getAndDelOption(arguments, "--allocationStrategy", allocationStrategyString); | |
| if (allocationStrategyString == "static") | |
| { | |
| memoryAllocationStrategy = MemoryAllocationStrategy::kSTATIC; | |
| } | |
| else if (allocationStrategyString == "profile") | |
| { | |
| memoryAllocationStrategy = MemoryAllocationStrategy::kPROFILE; | |
| } | |
| else if (allocationStrategyString == "runtime") | |
| { | |
| memoryAllocationStrategy = MemoryAllocationStrategy::kRUNTIME; | |
| } | |
| else if (!allocationStrategyString.empty()) | |
| { | |
| throw std::invalid_argument(std::string("Unknown allocationStrategy: ") + allocationStrategyString); | |
| } | |
| bool allowWs{false}; | |
| getAndDelOption(arguments, "--allowWeightStreaming", allowWs); | |
| bool wsBudgetFound = getAndDelOption(arguments, "--weightStreamingBudget", weightStreamingBudget); | |
| if (wsBudgetFound && !allowWs) | |
| { | |
| throw std::invalid_argument( | |
| "The weight streaming budget can only be set with --allowWeightStreaming specified."); | |
| } | |
| if (allowWs && weightStreamingBudget.isDisabled()) | |
| { | |
| sample::gLogWarning << "The engine can stream its weights but it will not at runtime because " | |
| "--weightStreamingBudget unset or set to " | |
| << WeightStreamingBudget::kDISABLE << "." << std::endl; | |
| } | |
| std::string debugTensorList; | |
| getAndDelOption(arguments, "--saveDebugTensors", debugTensorList); | |
| std::vector<std::string> fileNames{splitToStringVec(debugTensorList, ',')}; | |
| splitInsertKeyValue(fileNames, debugTensorFileNames); | |
| std::string debugFormats; | |
| getAndDelOption(arguments, "--saveAllDebugTensors", debugFormats); | |
| dumpAlldebugTensorFormats = splitToStringVec(debugFormats, ','); | |
| } | |
| void ReportingOptions::parse(Arguments& arguments) | |
| { | |
| getAndDelOption(arguments, "--avgRuns", avgs); | |
| getAndDelOption(arguments, "--verbose", verbose); | |
| getAndDelOption(arguments, "--dumpRefit", refit); | |
| getAndDelOption(arguments, "--dumpOutput", output); | |
| getAndDelOption(arguments, "--dumpRawBindingsToFile", dumpRawBindings); | |
| getAndDelOption(arguments, "--dumpProfile", profile); | |
| getAndDelOption(arguments, "--dumpLayerInfo", layerInfo); | |
| getAndDelOption(arguments, "--dumpOptimizationProfile", optProfileInfo); | |
| getAndDelOption(arguments, "--exportTimes", exportTimes); | |
| getAndDelOption(arguments, "--exportOutput", exportOutput); | |
| getAndDelOption(arguments, "--exportProfile", exportProfile); | |
| getAndDelOption(arguments, "--exportLayerInfo", exportLayerInfo); | |
| std::string percentileString; | |
| getAndDelOption(arguments, "--percentile", percentileString); | |
| std::vector<std::string> percentileStrings = splitToStringVec(percentileString, ','); | |
| if (!percentileStrings.empty()) | |
| { | |
| percentiles.clear(); | |
| } | |
| for (const auto& p : percentileStrings) | |
| { | |
| percentiles.push_back(stringToValue<float>(p)); | |
| } | |
| for (auto percentile : percentiles) | |
| { | |
| if (percentile < 0.F || percentile > 100.F) | |
| { | |
| throw std::invalid_argument(std::string("Percentile ") + std::to_string(percentile) + "is not in [0,100]"); | |
| } | |
| } | |
| } | |
| bool parseHelp(Arguments& arguments) | |
| { | |
| bool helpLong{false}; | |
| bool helpShort{false}; | |
| getAndDelOption(arguments, "--help", helpLong); | |
| getAndDelOption(arguments, "-h", helpShort); | |
| return helpLong || helpShort; | |
| } | |
| void AllOptions::parse(Arguments& arguments) | |
| { | |
| model.parse(arguments); | |
| build.parse(arguments); | |
| system.parse(arguments); | |
| inference.parse(arguments); | |
| if (build.useRuntime != RuntimeMode::kFULL && inference.timeRefit) | |
| { | |
| throw std::invalid_argument("--timeRefit requires --useRuntime=full."); | |
| } | |
| if (inference.optProfileIndex < static_cast<int32_t>(build.optProfiles.size())) | |
| { | |
| // Propagate shape profile between builder and inference | |
| for (auto const& s : build.optProfiles[inference.optProfileIndex]) | |
| { | |
| if (inference.shapes.find(s.first) == inference.shapes.end()) | |
| { | |
| insertShapesInference( | |
| inference.shapes, s.first, s.second[static_cast<size_t>(nvinfer1::OptProfileSelector::kOPT)]); | |
| } | |
| } | |
| for (auto const& s : inference.shapes) | |
| { | |
| if (build.optProfiles[inference.optProfileIndex].find(s.first) | |
| == build.optProfiles[inference.optProfileIndex].end()) | |
| { | |
| // assume min/opt/max all the same | |
| insertShapesBuild(build.optProfiles[inference.optProfileIndex], nvinfer1::OptProfileSelector::kMIN, | |
| s.first, s.second); | |
| insertShapesBuild(build.optProfiles[inference.optProfileIndex], nvinfer1::OptProfileSelector::kOPT, | |
| s.first, s.second); | |
| insertShapesBuild(build.optProfiles[inference.optProfileIndex], nvinfer1::OptProfileSelector::kMAX, | |
| s.first, s.second); | |
| } | |
| } | |
| } | |
| // Set nvtxVerbosity to be the same as build-time profilingVerbosity. | |
| inference.nvtxVerbosity = build.profilingVerbosity; | |
| reporting.parse(arguments); | |
| helps = parseHelp(arguments); | |
| if (!helps) | |
| { | |
| if (!build.load && model.baseModel.format == ModelFormat::kANY) | |
| { | |
| throw std::invalid_argument("Model missing or format not recognized"); | |
| } | |
| if (system.DLACore >= 0 && inference.graph) | |
| { | |
| sample::gLogWarning << "CUDA graphs and DLA offloading are not simultaneously supported. " | |
| << "The CUDA graph option has been disabled (alternatively, you may run without the " | |
| "`--useDLACore` option)." | |
| << std::endl; | |
| inference.graph = false; | |
| } | |
| if (build.safe && system.DLACore >= 0) | |
| { | |
| build.buildDLAStandalone = true; | |
| } | |
| if (build.runtimePlatform != nvinfer1::RuntimePlatform::kSAME_AS_BUILD) | |
| { | |
| build.skipInference = true; | |
| } | |
| if (build.buildDLAStandalone) | |
| { | |
| build.skipInference = true; | |
| auto checkSafeDLAFormats = [](std::vector<IOFormat> const& fmt, bool isInput) | |
| { | |
| return fmt.empty() | |
| ? false | |
| : std::all_of(fmt.begin(), fmt.end(), | |
| [&](IOFormat const& pair) | |
| { | |
| bool supported{false}; | |
| bool const isDLA_LINEAR{ | |
| pair.second == 1U << static_cast<int32_t>(nvinfer1::TensorFormat::kDLA_LINEAR)}; | |
| bool const isHWC4{pair.second == 1U << static_cast<int32_t>(nvinfer1::TensorFormat::kCHW4) | |
| || pair.second == 1U << static_cast<int32_t>(nvinfer1::TensorFormat::kDLA_HWC4)}; | |
| bool const isCHW32{ | |
| pair.second == 1U << static_cast<int32_t>(nvinfer1::TensorFormat::kCHW32)}; | |
| bool const isCHW16{ | |
| pair.second == 1U << static_cast<int32_t>(nvinfer1::TensorFormat::kCHW16)}; | |
| supported |= pair.first == nvinfer1::DataType::kINT8 | |
| && (isDLA_LINEAR || (isInput ? isHWC4 : false) || isCHW32); | |
| supported |= pair.first == nvinfer1::DataType::kHALF | |
| && (isDLA_LINEAR || (isInput ? isHWC4 : false) || isCHW16); | |
| return supported; | |
| }); | |
| }; | |
| if (!checkSafeDLAFormats(build.inputFormats, true) || !checkSafeDLAFormats(build.outputFormats, false)) | |
| { | |
| throw std::invalid_argument( | |
| "I/O formats for safe DLA capability are restricted to fp16/int8:dla_linear, fp16/int8:hwc4, " | |
| "fp16:chw16 or " | |
| "int8:chw32"); | |
| } | |
| if (build.allowGPUFallback) | |
| { | |
| throw std::invalid_argument("GPU fallback (--allowGPUFallback) not allowed for DLA standalone mode"); | |
| } | |
| } | |
| if (system.DLACore < 0 && build.enableUInt8AsymmetricQuantizationDLA) | |
| { | |
| throw std::invalid_argument("--uint8AsymmetricQuantizationDLA is not supported without DLA cores."); | |
| } | |
| } | |
| } | |
| void TaskInferenceOptions::parse(Arguments& arguments) | |
| { | |
| getAndDelOption(arguments, "engine", engine); | |
| getAndDelOption(arguments, "device", device); | |
| getAndDelOption(arguments, "batch", batch); | |
| getAndDelOption(arguments, "DLACore", DLACore); | |
| getAndDelOption(arguments, "graph", graph); | |
| getAndDelOption(arguments, "persistentCacheRatio", persistentCacheRatio); | |
| } | |
| void SafeBuilderOptions::parse(Arguments& arguments) | |
| { | |
| auto getFormats = [&arguments](std::vector<IOFormat>& formatsVector, const char* argument) { | |
| std::string list; | |
| getAndDelOption(arguments, argument, list); | |
| std::vector<std::string> formats{splitToStringVec(list, ',')}; | |
| for (const auto& f : formats) | |
| { | |
| formatsVector.push_back(stringToValue<IOFormat>(f)); | |
| } | |
| }; | |
| getAndDelOption(arguments, "--serialized", serialized); | |
| getAndDelOption(arguments, "--onnx", onnxModelFile); | |
| getAndDelOption(arguments, "--help", help); | |
| getAndDelOption(arguments, "-h", help); | |
| getAndDelOption(arguments, "--verbose", verbose); | |
| getAndDelOption(arguments, "-v", verbose); | |
| getFormats(inputFormats, "--inputIOFormats"); | |
| getFormats(outputFormats, "--outputIOFormats"); | |
| getAndDelOption(arguments, "--int8", int8); | |
| getAndDelOption(arguments, "--calib", calibFile); | |
| getAndDelOption(arguments, "--consistency", consistency); | |
| getAndDelOption(arguments, "--std", standard); | |
| std::string pluginName; | |
| while (getAndDelOption(arguments, "--plugins", pluginName)) | |
| { | |
| sample::gLogWarning << "--plugins flag has been deprecated, use --staticPlugins flag instead." << std::endl; | |
| plugins.emplace_back(pluginName); | |
| } | |
| while (getAndDelOption(arguments, "--staticPlugins", pluginName)) | |
| { | |
| plugins.emplace_back(pluginName); | |
| } | |
| bool noBuilderCache{false}; | |
| getAndDelOption(arguments, "--noBuilderCache", noBuilderCache); | |
| getAndDelOption(arguments, "--timingCacheFile", timingCacheFile); | |
| getAndDelOption(arguments, "--avgTiming", avgTiming); | |
| if (noBuilderCache) | |
| { | |
| timingCacheMode = TimingCacheMode::kDISABLE; | |
| } | |
| else if (!timingCacheFile.empty()) | |
| { | |
| timingCacheMode = TimingCacheMode::kGLOBAL; | |
| } | |
| else | |
| { | |
| timingCacheMode = TimingCacheMode::kLOCAL; | |
| } | |
| getAndDelOption(arguments, "--sparsity", sparsity); | |
| } | |
| std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options) | |
| { | |
| os << "=== Model Options ===" << std::endl; | |
| os << "Format: "; | |
| switch (options.format) | |
| { | |
| case ModelFormat::kONNX: | |
| { | |
| os << "ONNX"; | |
| break; | |
| } | |
| case ModelFormat::kANY: os << "*"; break; | |
| } | |
| os << std::endl << "Model: " << options.model << std::endl; | |
| return os; | |
| } | |
| std::ostream& operator<<(std::ostream& os, const ModelOptions& options) | |
| { | |
| os << options.baseModel; | |
| switch (options.baseModel.format) | |
| { | |
| case ModelFormat::kONNX: // Fallthrough: No options to report for ONNX or the generic case | |
| case ModelFormat::kANY: break; | |
| } | |
| os << "Output:"; | |
| for (const auto& o : options.outputs) | |
| { | |
| os << " " << o; | |
| } | |
| os << std::endl; | |
| return os; | |
| } | |
| std::ostream& operator<<(std::ostream& os, nvinfer1::DataType dtype) | |
| { | |
| switch (dtype) | |
| { | |
| case nvinfer1::DataType::kFLOAT: | |
| { | |
| os << "fp32"; | |
| break; | |
| } | |
| case nvinfer1::DataType::kHALF: | |
| { | |
| os << "fp16"; | |
| break; | |
| } | |
| case nvinfer1::DataType::kBF16: | |
| { | |
| os << "bf16"; | |
| break; | |
| } | |
| case nvinfer1::DataType::kINT8: | |
| { | |
| os << "int8"; | |
| break; | |
| } | |
| case nvinfer1::DataType::kINT32: | |
| { | |
| os << "int32"; | |
| break; | |
| } | |
| case nvinfer1::DataType::kBOOL: | |
| { | |
| os << "bool"; | |
| break; | |
| } | |
| case nvinfer1::DataType::kUINT8: | |
| { | |
| os << "uint8"; | |
| break; | |
| } | |
| case nvinfer1::DataType::kFP8: | |
| { | |
| os << "fp8"; | |
| break; | |
| } | |
| case nvinfer1::DataType::kINT64: | |
| { | |
| os << "int64"; | |
| break; | |
| } | |
| case nvinfer1::DataType::kINT4: | |
| { | |
| os << "int4"; | |
| break; | |
| } | |
| case nvinfer1::DataType::kFP4: | |
| { | |
| os << "fp4"; | |
| break; | |
| } | |
| case nvinfer1::DataType::kE8M0: | |
| { | |
| os << "e8m0"; | |
| break; | |
| } | |
| } | |
| return os; | |
| } | |
| std::ostream& operator<<(std::ostream& os, IOFormat const& format) | |
| { | |
| os << format.first << ":"; | |
| for (int32_t f = 0; f < nvinfer1::EnumMax<nvinfer1::TensorFormat>(); ++f) | |
| { | |
| if ((1U << f) & format.second) | |
| { | |
| if (f) | |
| { | |
| os << "+"; | |
| } | |
| switch (nvinfer1::TensorFormat(f)) | |
| { | |
| case nvinfer1::TensorFormat::kLINEAR: | |
| { | |
| os << "chw"; | |
| break; | |
| } | |
| case nvinfer1::TensorFormat::kCHW2: | |
| { | |
| os << "chw2"; | |
| break; | |
| } | |
| case nvinfer1::TensorFormat::kHWC8: | |
| { | |
| os << "hwc8"; | |
| break; | |
| } | |
| case nvinfer1::TensorFormat::kHWC16: | |
| { | |
| os << "hwc16"; | |
| break; | |
| } | |
| case nvinfer1::TensorFormat::kCHW4: | |
| { | |
| os << "chw4"; | |
| break; | |
| } | |
| case nvinfer1::TensorFormat::kCHW16: | |
| { | |
| os << "chw16"; | |
| break; | |
| } | |
| case nvinfer1::TensorFormat::kCHW32: | |
| { | |
| os << "chw32"; | |
| break; | |
| } | |
| case nvinfer1::TensorFormat::kDHWC8: | |
| { | |
| os << "dhwc8"; | |
| break; | |
| } | |
| case nvinfer1::TensorFormat::kCDHW32: | |
| { | |
| os << "cdhw32"; | |
| break; | |
| } | |
| case nvinfer1::TensorFormat::kHWC: | |
| { | |
| os << "hwc"; | |
| break; | |
| } | |
| case nvinfer1::TensorFormat::kDHWC: | |
| { | |
| os << "dhwc"; | |
| break; | |
| } | |
| case nvinfer1::TensorFormat::kDLA_LINEAR: | |
| { | |
| os << "dla_linear"; | |
| break; | |
| } | |
| case nvinfer1::TensorFormat::kDLA_HWC4: | |
| { | |
| os << "dla_hwc4"; | |
| break; | |
| } | |
| } | |
| } | |
| } | |
| return os; | |
| } | |
| std::ostream& operator<<(std::ostream& os, nvinfer1::DeviceType devType) | |
| { | |
| switch (devType) | |
| { | |
| case nvinfer1::DeviceType::kGPU: | |
| { | |
| os << "GPU"; | |
| break; | |
| } | |
| case nvinfer1::DeviceType::kDLA: | |
| { | |
| os << "DLA"; | |
| break; | |
| } | |
| } | |
| return os; | |
| } | |
| std::ostream& operator<<(std::ostream& os, nvinfer1::RuntimePlatform platform) | |
| { | |
| switch (platform) | |
| { | |
| case nvinfer1::RuntimePlatform::kSAME_AS_BUILD: | |
| { | |
| os << "Same As Build"; | |
| break; | |
| } | |
| case nvinfer1::RuntimePlatform::kWINDOWS_AMD64: | |
| { | |
| os << "Windows AMD64"; | |
| break; | |
| } | |
| } | |
| return os; | |
| } | |
| std::ostream& operator<<(std::ostream& os, const ShapeRange& dims) | |
| { | |
| int32_t i = 0; | |
| for (const auto& d : dims) | |
| { | |
| if (!d.size()) | |
| { | |
| break; | |
| } | |
| os << (i ? "+" : "") << d; | |
| ++i; | |
| } | |
| return os; | |
| } | |
| std::ostream& operator<<(std::ostream& os, LayerPrecisions const& layerPrecisions) | |
| { | |
| int32_t i = 0; | |
| for (auto const& layerPrecision : layerPrecisions) | |
| { | |
| os << (i ? "," : "") << layerPrecision.first << ":" << layerPrecision.second; | |
| ++i; | |
| } | |
| return os; | |
| } | |
| std::ostream& operator<<(std::ostream& os, LayerDeviceTypes const& layerDeviceTypes) | |
| { | |
| int32_t i = 0; | |
| for (auto const& layerDevicePair : layerDeviceTypes) | |
| { | |
| os << (i++ ? ", " : "") << layerDevicePair.first << ":" << layerDevicePair.second; | |
| } | |
| return os; | |
| } | |
| std::ostream& operator<<(std::ostream& os, StringSet const& stringSet) | |
| { | |
| int64_t i = 0; | |
| for (auto const& s : stringSet) | |
| { | |
| os << (i ? "," : "") << s; | |
| ++i; | |
| } | |
| return os; | |
| } | |
| std::ostream& operator<<(std::ostream& os, const BuildOptions& options) | |
| { | |
| // if loadEngine is specified, BuildOptions are N/A | |
| if (options.load) | |
| { | |
| os << std::endl; | |
| return os; | |
| } | |
| // clang-format off | |
| os << "=== Build Options ===" << std::endl << | |
| "Memory Pools: "; printMemoryPools(os, options) << std::endl << | |
| "avgTiming: " << options.avgTiming << std::endl << | |
| "Precision: "; printPrecision(os, options) << std::endl << | |
| "LayerPrecisions: " << options.layerPrecisions << std::endl << | |
| "Layer Device Types: " << options.layerDeviceTypes << std::endl << | |
| "Calibration: " << (options.int8 && options.calibration.empty() ? "Dynamic" : options.calibration.c_str()) << std::endl << | |
| "Refit: " << boolToEnabled(options.refittable) << std::endl << | |
| "Strip weights: " << boolToEnabled(options.stripWeights) << std::endl << | |
| "Version Compatible: " << boolToEnabled(options.versionCompatible) << std::endl << | |
| "ONNX Plugin InstanceNorm: " << boolToEnabled(options.pluginInstanceNorm) << std::endl << | |
| "ONNX kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA flag: " << boolToEnabled(options.enableUInt8AsymmetricQuantizationDLA) << std::endl << | |
| "TensorRT runtime: " << options.useRuntime << std::endl << | |
| "Lean DLL Path: " << options.leanDLLPath << std::endl << | |
| "Tempfile Controls: "; printTempfileControls(os, options.tempfileControls) << std::endl << | |
| "Exclude Lean Runtime: " << boolToEnabled(options.excludeLeanRuntime) << std::endl << | |
| "Sparsity: "; printSparsity(os, options) << std::endl << | |
| "Safe mode: " << boolToEnabled(options.safe) << std::endl << | |
| "Build DLA standalone loadable: " << boolToEnabled(options.buildDLAStandalone) << std::endl << | |
| "Allow GPU fallback for DLA: " << boolToEnabled(options.allowGPUFallback) << std::endl << | |
| "DirectIO mode: " << boolToEnabled(options.directIO) << std::endl << | |
| "Restricted mode: " << boolToEnabled(options.restricted) << std::endl << | |
| "Skip inference: " << boolToEnabled(options.skipInference) << std::endl << | |
| "Save engine: " << (options.save ? options.engine : "") << std::endl << | |
| "Load engine: " << (options.load ? options.engine : "") << std::endl << | |
| "Profiling verbosity: " << static_cast<int32_t>(options.profilingVerbosity) << std::endl << | |
| "Tactic sources: "; printTacticSources(os, options.enabledTactics, options.disabledTactics) << std::endl << | |
| "timingCacheMode: "; printTimingCache(os, options.timingCacheMode) << std::endl << | |
| "timingCacheFile: " << options.timingCacheFile << std::endl << | |
| "Enable Compilation Cache: "<< boolToEnabled(!options.disableCompilationCache) << std::endl << | |
| "Enable Monitor Memory: "<< boolToEnabled(options.enableMonitorMemory) << std::endl << | |
| "errorOnTimingCacheMiss: " << boolToEnabled(options.errorOnTimingCacheMiss) << std::endl << | |
| "Preview Features: "; printPreviewFlags(os, options) << std::endl << | |
| "MaxAuxStreams: " << options.maxAuxStreams << std::endl << | |
| "BuilderOptimizationLevel: " << options.builderOptimizationLevel << std::endl << | |
| "MaxTactics: " << options.maxTactics << std::endl << | |
| "Calibration Profile Index: " << options.calibProfile << std::endl << | |
| "Weight Streaming: " << boolToEnabled(options.allowWeightStreaming) << std::endl << | |
| "Runtime Platform: " << options.runtimePlatform << std::endl << | |
| "Debug Tensors: " << options.debugTensors << std::endl << | |
| "Distributive Independence: " << boolToEnabled(options.distributiveIndependence) << std::endl << | |
| "Mark Unfused Tensors As Debug Tensors: " << boolToEnabled(options.markUnfusedTensorsAsDebugTensors) << std::endl; | |
| // clang-format on | |
| auto printIOFormats = [](std::ostream& os, const char* direction, const std::vector<IOFormat> formats) { | |
| if (formats.empty()) | |
| { | |
| os << direction << "s format: fp32:CHW" << std::endl; | |
| } | |
| else | |
| { | |
| for (const auto& f : formats) | |
| { | |
| os << direction << ": " << f << std::endl; | |
| } | |
| } | |
| }; | |
| printIOFormats(os, "Input(s)", options.inputFormats); | |
| printIOFormats(os, "Output(s)", options.outputFormats); | |
| for (size_t i = 0; i < options.optProfiles.size(); i++) | |
| { | |
| printShapes(os, "build", options.optProfiles[i], i); | |
| } | |
| printShapes(os, "calibration", options.shapesCalib, -1); | |
| return os; | |
| } | |
| std::ostream& operator<<(std::ostream& os, const SystemOptions& options) | |
| { | |
| // clang-format off | |
| os << "=== System Options ===" << std::endl << | |
| "Device: " << options.device << std::endl << | |
| "DLACore: " << (options.DLACore != -1 ? std::to_string(options.DLACore) : "") << std::endl; | |
| os << "Plugins:"; | |
| for (const auto& p : options.plugins) | |
| { | |
| os << " " << p; | |
| } | |
| os << std::endl; | |
| os << "setPluginsToSerialize:"; | |
| for (const auto& p : options.setPluginsToSerialize) | |
| { | |
| os << " " << p; | |
| } | |
| os << std::endl; | |
| os << "dynamicPlugins:"; | |
| for (const auto& p : options.dynamicPlugins) | |
| { | |
| os << " " << p; | |
| } | |
| os << std::endl; | |
| os << "ignoreParsedPluginLibs: " << options.ignoreParsedPluginLibs << std::endl; | |
| os << std::endl; | |
| return os; | |
| // clang-format on | |
| } | |
| std::ostream& operator<<(std::ostream& os, const InferenceOptions& options) | |
| { | |
| // clang-format off | |
| os << "=== Inference Options ===" << std::endl << | |
| "Batch: "; | |
| if (options.batch && options.shapes.empty()) | |
| { | |
| os << options.batch << std::endl; | |
| } | |
| else | |
| { | |
| os << "Explicit" << std::endl; | |
| } | |
| printShapes(os, "inference", options.shapes, options.optProfileIndex); | |
| std::string wsBudget{"Disabled"}; | |
| if (options.weightStreamingBudget.bytes == WeightStreamingBudget::kAUTOMATIC) | |
| { | |
| wsBudget = "Automatic"; | |
| } | |
| else if (options.weightStreamingBudget.bytes != WeightStreamingBudget::kDISABLE) | |
| { | |
| wsBudget = std::to_string(options.weightStreamingBudget.bytes) + " bytes"; | |
| } | |
| else if (options.weightStreamingBudget.percent != WeightStreamingBudget::kDISABLE) | |
| { | |
| wsBudget = std::to_string(options.weightStreamingBudget.percent) + "%"; | |
| } | |
| os << "Iterations: " << options.iterations << std::endl << | |
| "Duration: " << options.duration << "s (+ " | |
| << options.warmup << "ms warm up)" << std::endl << | |
| "Sleep time: " << options.sleep << "ms" << std::endl << | |
| "Idle time: " << options.idle << "ms" << std::endl << | |
| "Inference Streams: " << options.infStreams << std::endl << | |
| "ExposeDMA: " << boolToEnabled(!options.overlap) << std::endl << | |
| "Data transfers: " << boolToEnabled(!options.skipTransfers) << std::endl << | |
| "Spin-wait: " << boolToEnabled(options.spin) << std::endl << | |
| "Multithreading: " << boolToEnabled(options.threads) << std::endl << | |
| "CUDA Graph: " << boolToEnabled(options.graph) << std::endl << | |
| "Separate profiling: " << boolToEnabled(options.rerun) << std::endl << | |
| "Time Deserialize: " << boolToEnabled(options.timeDeserialize) << std::endl << | |
| "Time Refit: " << boolToEnabled(options.timeRefit) << std::endl << | |
| "NVTX verbosity: " << static_cast<int32_t>(options.nvtxVerbosity) << std::endl << | |
| "Persistent Cache Ratio: " << static_cast<float>(options.persistentCacheRatio) << std::endl << | |
| "Optimization Profile Index: "<< options.optProfileIndex << std::endl << | |
| "Weight Streaming Budget: " << wsBudget << std::endl; | |
| // clang-format on | |
| os << "Inputs:" << std::endl; | |
| for (const auto& input : options.inputs) | |
| { | |
| os << input.first << "<-" << input.second << std::endl; | |
| } | |
| os << "Debug Tensor Save Destinations:" << std::endl; | |
| for (auto const& fileName : options.debugTensorFileNames) | |
| { | |
| os << fileName.first << ": " << fileName.second << std::endl; | |
| } | |
| os << "Dump All Debug Tensor in Formats: " << std::endl; | |
| for (auto const& format : options.dumpAlldebugTensorFormats) | |
| { | |
| os << format << std::endl; | |
| } | |
| return os; | |
| } | |
| std::ostream& operator<<(std::ostream& os, const ReportingOptions& options) | |
| { | |
| // clang-format off | |
| os << "=== Reporting Options ===" << std::endl << | |
| "Verbose: " << boolToEnabled(options.verbose) << std::endl << | |
| "Averages: " << options.avgs << " inferences" << std::endl << | |
| "Percentiles: " << joinValuesToString(options.percentiles, ",") << std::endl << | |
| "Dump refittable layers:" << boolToEnabled(options.refit) << std::endl << | |
| "Dump output: " << boolToEnabled(options.output) << std::endl << | |
| "Profile: " << boolToEnabled(options.profile) << std::endl << | |
| "Export timing to JSON file: " << options.exportTimes << std::endl << | |
| "Export output to JSON file: " << options.exportOutput << std::endl << | |
| "Export profile to JSON file: " << options.exportProfile << std::endl; | |
| // clang-format on | |
| return os; | |
| } | |
| std::ostream& operator<<(std::ostream& os, const AllOptions& options) | |
| { | |
| os << options.model << options.build << options.system << options.inference << options.reporting << std::endl; | |
| return os; | |
| } | |
| std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options) | |
| { | |
| auto printIOFormats = [](std::ostream& os, const char* direction, const std::vector<IOFormat> formats) | |
| { | |
| if (formats.empty()) | |
| { | |
| os << direction << "s format: fp32:CHW" << std::endl; | |
| } | |
| else | |
| { | |
| for (const auto& f : formats) | |
| { | |
| os << direction << ": " << f << std::endl; | |
| } | |
| } | |
| }; | |
| os << "=== Build Options ===" << std::endl; | |
| os << "Model ONNX: " << options.onnxModelFile << std::endl; | |
| os << "Precision: FP16"; | |
| if (options.int8) | |
| { | |
| os << " + INT8"; | |
| } | |
| if (options.fp8) | |
| { | |
| os << " + FP8"; | |
| } | |
| if (options.int4) | |
| { | |
| os << " + INT4"; | |
| } | |
| os << std::endl; | |
| os << "Calibration file: " << options.calibFile << std::endl; | |
| os << "Serialized Network: " << options.serialized << std::endl; | |
| printIOFormats(os, "Input(s)", options.inputFormats); | |
| printIOFormats(os, "Output(s)", options.outputFormats); | |
| os << "Plugins:"; | |
| for (const auto& p : options.plugins) | |
| { | |
| os << " " << p; | |
| } | |
| os << "timingCacheMode: "; | |
| printTimingCache(os, options.timingCacheMode) << std::endl; | |
| os << "timingCacheFile: " << options.timingCacheFile << std::endl; | |
| os << std::endl; | |
| return os; | |
| } | |
| void BaseModelOptions::help(std::ostream& os) | |
| { | |
| // clang-format off | |
| os << " --onnx=<file> ONNX model" << std::endl; | |
| // clang-format on | |
| } | |
| void ModelOptions::help(std::ostream& os) | |
| { | |
| // clang-format off | |
| os << "=== Model Options ===" << std::endl; | |
| BaseModelOptions::help(os); | |
| // clang-format on | |
| } | |
| void BuildOptions::help(std::ostream& os) | |
| { | |
| // clang-format off | |
| os << "=== Build Options ===" "\n" | |
| " --minShapes=spec Build with dynamic shapes using a profile with the min shapes provided" "\n" | |
| " --optShapes=spec Build with dynamic shapes using a profile with the opt shapes provided" "\n" | |
| " --maxShapes=spec Build with dynamic shapes using a profile with the max shapes provided" "\n" | |
| " --minShapesCalib=spec Calibrate with dynamic shapes using a profile with the min shapes provided" "\n" | |
| " --optShapesCalib=spec Calibrate with dynamic shapes using a profile with the opt shapes provided" "\n" | |
| " --maxShapesCalib=spec Calibrate with dynamic shapes using a profile with the max shapes provided" "\n" | |
| " Note: All three of min, opt and max shapes must be supplied." "\n" | |
| " However, if only opt shapes is supplied then it will be expanded so" "\n" | |
| " that min shapes and max shapes are set to the same values as opt shapes." "\n" | |
| " Input names can be wrapped with escaped single quotes (ex: 'Input:0')." "\n" | |
| " Example input shapes spec: input0:1x3x256x256,input1:1x3x128x128" "\n" | |
| " For scalars (0-D shapes), use input0:scalar or simply input0: with nothing after the colon." "\n" | |
| " Each input shape is supplied as a key-value pair where key is the input name and" "\n" | |
| " value is the dimensions (including the batch dimension) to be used for that input." "\n" | |
| " Each key-value pair has the key and value separated using a colon (:)." "\n" | |
| " Multiple input shapes can be provided via comma-separated key-value pairs, and each input name can" "\n" | |
| " contain at most one wildcard ('*') character." "\n" | |
| " --inputIOFormats=spec Type and format of each of the input tensors (default = all inputs in fp32:chw)" "\n" | |
| " See --outputIOFormats help for the grammar of type and format list." "\n" | |
| " Note: If this option is specified, please set comma-separated types and formats for all" "\n" | |
| " inputs following the same order as network inputs ID (even if only one input" "\n" | |
| " needs specifying IO format) or set the type and format once for broadcasting." "\n" | |
| " --outputIOFormats=spec Type and format of each of the output tensors (default = all outputs in fp32:chw)" "\n" | |
| " Note: If this option is specified, please set comma-separated types and formats for all" "\n" | |
| " outputs following the same order as network outputs ID (even if only one output" "\n" | |
| " needs specifying IO format) or set the type and format once for broadcasting." "\n" | |
| R"( IO Formats: spec ::= IOfmt[","spec])" "\n" | |
| " IOfmt ::= type:fmt" "\n" | |
| R"( type ::= "fp32"|"fp16"|"bf16"|"int32"|"int64"|"int8"|"uint8"|"bool")" "\n" | |
| R"( fmt ::= ("chw"|"chw2"|"hwc8"|"chw4"|"chw16"|"chw32"|"dhwc8"|)" "\n" | |
| R"( "cdhw32"|"hwc"|"dla_linear"|"dla_hwc4"|"hwc16"|"dhwc")["+"fmt])" "\n" | |
| " --memPoolSize=poolspec Specify the size constraints of the designated memory pool(s)" "\n" | |
| " Supports the following base-2 suffixes: " << getAvailableUnitSuffixes() << "." "\n" | |
| " If none of suffixes is appended, the defualt unit is in MiB." "\n" | |
| " Note: Also accepts decimal sizes, e.g. 0.25M. Will be rounded down to the nearest integer bytes." "\n" | |
| " In particular, for dlaSRAM the bytes will be rounded down to the nearest power of 2." "\n" | |
| R"( Pool constraint: poolspec ::= poolfmt[","poolspec])" "\n" | |
| " poolfmt ::= pool:size\n" | |
| R"( pool ::= "workspace"|"dlaSRAM"|"dlaLocalDRAM"|"dlaGlobalDRAM"|"tacticSharedMem")" "\n" | |
| " --profilingVerbosity=mode Specify profiling verbosity. mode ::= layer_names_only|detailed|none (default = layer_names_only)." "\n" | |
| " Please only assign once." "\n" | |
| " --avgTiming=M Set the number of times averaged in each iteration for kernel selection (default = " | |
| << defaultAvgTiming << ")" "\n" | |
| " --refit Mark the engine as refittable. This will allow the inspection of refittable layers " "\n" | |
| " and weights within the engine." "\n" | |
| " --stripWeights Strip weights from plan. This flag works with either refit or refit with identical weights. Default""\n" | |
| " to latter, but you can switch to the former by enabling both --stripWeights and --refit at the same""\n" | |
| " time." "\n" | |
| " --stripAllWeights Alias for combining the --refit and --stripWeights options. It marks all weights as refittable," "\n" | |
| " disregarding any performance impact. Additionally, it strips all refittable weights after the " "\n" | |
| " engine is built." "\n" | |
| " --weightless [Deprecated] this knob has been deprecated. Please use --stripWeights" "\n" | |
| " --versionCompatible, --vc Mark the engine as version compatible. This allows the engine to be used with newer versions" "\n" | |
| " of TensorRT on the same host OS, as well as TensorRT's dispatch and lean runtimes." "\n" | |
| " --pluginInstanceNorm, --pi Set `kNATIVE_INSTANCENORM` to false in the ONNX parser. This will cause the ONNX parser to use" "\n" | |
| " a plugin InstanceNorm implementation over the native implementation when parsing." "\n" | |
| " --uint8AsymmetricQuantizationDLA Set `kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA` to true in the ONNX parser. This directs the" "\n" | |
| " onnx parser to allow UINT8 as a quantization data type and import zero point values directly" "\n" | |
| " without converting to float type or all-zero values. Should only be set with DLA software version" "\n" | |
| " >= 3.16." "\n" | |
| R"( --useRuntime=runtime TensorRT runtime to execute engine. "lean" and "dispatch" require loading VC engine and do)" "\n" | |
| " not support building an engine." "\n" | |
| R"( runtime::= "full"|"lean"|"dispatch")" "\n" | |
| " --leanDLLPath=<file> External lean runtime DLL to use in version compatiable mode." "\n" | |
| " --excludeLeanRuntime When --versionCompatible is enabled, this flag indicates that the generated engine should" "\n" | |
| " not include an embedded lean runtime. If this is set, the user must explicitly specify a" "\n" | |
| " valid lean runtime to use when loading the engine." "\n" | |
| " --monitorMemory Enable memory monitor report for debugging usage. (default = disabled)" "\n" | |
| " --sparsity=spec Control sparsity (default = disabled). " "\n" | |
| R"( Sparsity: spec ::= "disable", "enable", "force")" "\n" | |
| " Note: Description about each of these options is as below" "\n" | |
| " disable = do not enable sparse tactics in the builder (this is the default)" "\n" | |
| " enable = enable sparse tactics in the builder (but these tactics will only be" "\n" | |
| " considered if the weights have the right sparsity pattern)" "\n" | |
| " force = enable sparse tactics in the builder and force-overwrite the weights to have" "\n" | |
| " a sparsity pattern (even if you loaded a model yourself)" "\n" | |
| " [Deprecated] this knob has been deprecated." "\n" | |
| " Please use <polygraphy surgeon prune> to rewrite the weights." "\n" | |
| " --noTF32 Disable tf32 precision (default is to enable tf32, in addition to fp32)" "\n" | |
| " --fp16 Enable fp16 precision, in addition to fp32 (default = disabled)" "\n" | |
| " --bf16 Enable bf16 precision, in addition to fp32 (default = disabled)" "\n" | |
| " --int8 Enable int8 precision, in addition to fp32 (default = disabled)" "\n" | |
| " --fp8 Enable fp8 precision, in addition to fp32 (default = disabled)" "\n" | |
| " --int4 Enable int4 precision, in addition to fp32 (default = disabled)" "\n" | |
| " --best Enable all precisions to achieve the best performance (default = disabled)" "\n" | |
| " --stronglyTyped Create a strongly typed network. (default = disabled)" "\n" | |
| " --directIO [Deprecated] Avoid reformatting at network boundaries. (default = disabled)" "\n" | |
| " --precisionConstraints=spec Control precision constraint setting. (default = none)" "\n" | |
| R"( Precision Constraints: spec ::= "none" | "obey" | "prefer")" "\n" | |
| " none = no constraints" "\n" | |
| " prefer = meet precision constraints set by --layerPrecisions/--layerOutputTypes if possible" "\n" | |
| " obey = meet precision constraints set by --layerPrecisions/--layerOutputTypes or fail" "\n" | |
| " otherwise" "\n" | |
| " --layerPrecisions=spec Control per-layer precision constraints. Effective only when precisionConstraints is set to" "\n" | |
| R"( "obey" or "prefer". (default = none))" "\n" | |
| R"( The specs are read left-to-right, and later ones override earlier ones. Each layer name can)" "\n" | |
| " contain at most one wildcard ('*') character." "\n" | |
| R"( Per-layer precision spec ::= layerPrecision[","spec])" "\n" | |
| R"( layerPrecision ::= layerName":"precision)" "\n" | |
| R"( precision ::= "fp32"|"fp16"|"bf16"|"int32"|"int8")" "\n" | |
| " --layerOutputTypes=spec Control per-layer output type constraints. Effective only when precisionConstraints is set to" "\n" | |
| R"( "obey" or "prefer". (default = none)" "\n" | |
| R"( The specs are read left-to-right, and later ones override earlier ones. Each layer name can)" "\n" | |
| " contain at most one wildcard ('*') character. If a layer has more than" "\n" | |
| R"( one output, then multiple types separated by "+" can be provided for this layer.)" "\n" | |
| R"( Per-layer output type spec ::= layerOutputTypes[","spec])" "\n" | |
| R"( layerOutputTypes ::= layerName":"type)" "\n" | |
| R"( type ::= "fp32"|"fp16"|"bf16"|"int32"|"int8"["+"type])" "\n" | |
| " --layerDeviceTypes=spec Specify layer-specific device type." "\n" | |
| " The specs are read left-to-right, and later ones override earlier ones. If a layer does not have" "\n" | |
| " a device type specified, the layer will opt for the default device type." "\n" | |
| R"( Per-layer device type spec ::= layerDeviceTypePair[","spec])" "\n" | |
| R"( layerDeviceTypePair ::= layerName":"deviceType)" "\n" | |
| R"( deviceType ::= "GPU"|"DLA")" "\n" | |
| " --calib=<file> Read INT8 calibration cache file" "\n" | |
| " --safe Enable build safety certified engine, if DLA is enable, --buildDLAStandalone will be specified" "\n" | |
| " automatically (default = disabled)" "\n" | |
| " --buildDLAStandalone Enable build DLA standalone loadable which can be loaded by cuDLA, when this option is enabled, " "\n" | |
| " --allowGPUFallback is disallowed and --skipInference is enabled by default. Additionally, " "\n" | |
| " specifying --inputIOFormats and --outputIOFormats restricts I/O data type and memory layout" "\n" | |
| " (default = disabled)" "\n" | |
| " --allowGPUFallback When DLA is enabled, allow GPU fallback for unsupported layers (default = disabled)" "\n" | |
| " --consistency Perform consistency checking on safety certified engine" "\n" | |
| " --restricted Enable safety scope checking with kSAFETY_SCOPE build flag" "\n" | |
| " --saveEngine=<file> Save the serialized engine" "\n" | |
| " --loadEngine=<file> Load a serialized engine" "\n" | |
| " --asyncFileReader Load a serialized engine using async stream reader. Should be combined with --loadEngine." "\n" | |
| " --getPlanVersionOnly Print TensorRT version when loaded plan was created. Works without deserialization of the plan." "\n" | |
| " Use together with --loadEngine. Supported only for engines created with 8.6 and forward." "\n" | |
| " --tacticSources=tactics Specify the tactics to be used by adding (+) or removing (-) tactics from the default " "\n" | |
| " tactic sources (default = all available tactics)." "\n" | |
| " Note: Currently only cuDNN, cuBLAS, cuBLAS-LT, and edge mask convolutions are listed as optional" "\n" | |
| " tactics." "\n" | |
| R"( Tactic Sources: tactics ::= tactic[","tactics])" "\n" | |
| " tactic ::= (+|-)lib" "\n" | |
| R"( lib ::= "CUBLAS"|"CUBLAS_LT"|"CUDNN"|"EDGE_MASK_CONVOLUTIONS")" "\n" | |
| R"( |"JIT_CONVOLUTIONS")" "\n" | |
| " For example, to disable cudnn and enable cublas: --tacticSources=-CUDNN,+CUBLAS" "\n" | |
| " --noBuilderCache Disable timing cache in builder (default is to enable timing cache)" "\n" | |
| " --noCompilationCache Disable Compilation cache in builder, and the cache is part of timing cache (default is to enable compilation cache)" "\n" | |
| " --errorOnTimingCacheMiss Emit error when a tactic being timed is not present in the timing cache (default = false)" "\n" | |
| " --timingCacheFile=<file> Save/load the serialized global timing cache" "\n" | |
| " --preview=features Specify preview feature to be used by adding (+) or removing (-) preview features from the default" "\n" | |
| R"( Preview Features: features ::= feature[","features])" "\n" | |
| " feature ::= (+|-)flag" "\n" | |
| R"( flag ::= "aliasedPluginIO1003")" "\n" | |
| R"( |"runtimeActivationResize")" "\n" | |
| R"( |"profileSharing0806")" "\n" | |
| " --builderOptimizationLevel Set the builder optimization level. (default is 3)" "\n" | |
| " A Higher level allows TensorRT to spend more time searching for better optimization strategy." "\n" | |
| " Valid values include integers from 0 to the maximum optimization level, which is currently 5." "\n" | |
| " --maxTactics Set the maximum number of tactics to time when there is a choice of tactics. (default is -1)" "\n" | |
| " Larger number of tactics allow TensorRT to spend more building time on evaluating tactics." "\n" | |
| " Default value -1 means TensorRT can decide the number of tactics based on its own heuristic." "\n" | |
| " --hardwareCompatibilityLevel=mode Make the engine file compatible with other GPU architectures. (default = none)" "\n" | |
| R"( Hardware Compatibility Level: mode ::= "none" | "ampere+" | "sameComputeCapability")" "\n" | |
| " none = no compatibility" "\n" | |
| " ampere+ = compatible with Ampere and newer GPUs" "\n" | |
| " sameComputeCapability = compatible with GPUs that have the same Compute Capability version" "\n" | |
| " --runtimePlatform=platform Set the target platform for runtime execution. (default = SameAsBuild)" "\n" | |
| " When this option is enabled, --skipInference is enabled by default." "\n" | |
| R"( RuntimePlatfrom: platform ::= "SameAsBuild" | "WindowsAMD64")" "\n" | |
| " SameAsBuild = no requirement for cross-platform compatibility." "\n" | |
| " WindowsAMD64 = set the target platform for engine execution as Windows AMD64 system" "\n" | |
| " --tempdir=<dir> Overrides the default temporary directory TensorRT will use when creating temporary files." "\n" | |
| " See IRuntime::setTemporaryDirectory API documentation for more information." "\n" | |
| " --tempfileControls=controls Controls what TensorRT is allowed to use when creating temporary executable files." "\n" | |
| " Should be a comma-separated list with entries in the format (in_memory|temporary):(allow|deny)." "\n" | |
| " in_memory: Controls whether TensorRT is allowed to create temporary in-memory executable files." "\n" | |
| " temporary: Controls whether TensorRT is allowed to create temporary executable files in the" "\n" | |
| " filesystem (in the directory given by --tempdir)." "\n" | |
| " For example, to allow in-memory files and disallow temporary files:" "\n" | |
| " --tempfileControls=in_memory:allow,temporary:deny" "\n" | |
| R"( If a flag is unspecified, the default behavior is "allow".)" "\n" | |
| " --maxAuxStreams=N Set maximum number of auxiliary streams per inference stream that TRT is allowed to use to run " "\n" | |
| " kernels in parallel if the network contains ops that can run in parallel, with the cost of more " "\n" | |
| " memory usage. Set this to 0 for optimal memory usage. (default = using heuristics)" "\n" | |
| " --profile Build with dynamic shapes using a profile with the min/max/opt shapes provided. Can be specified" "\n" | |
| " multiple times to create multiple profiles with contiguous index." "\n" | |
| " (ex: --profile=0 --minShapes=<spec> --optShapes=<spec> --maxShapes=<spec> --profile=1 ...)" "\n" | |
| " --calibProfile Select the optimization profile to calibrate by index. (default = " | |
| << defaultOptProfileIndex << ")" "\n" | |
| " --allowWeightStreaming Enable a weight streaming engine. Must be specified with --stronglyTyped. TensorRT will disable" "\n" | |
| " weight streaming at runtime unless --weightStreamingBudget is specified." "\n" | |
| " --markDebug Specify list of names of tensors to be marked as debug tensors. Separate names with a comma" "\n" | |
| " --markUnfusedTensorsAsDebugTensors Mark unfused tensors as debug tensors" "\n" | |
| " --tilingOptimizationLevel Set the tiling optimization level. (default is " << defaultTilingOptimizationLevel << ")" "\n" | |
| " A Higher level allows TensorRT to spend more time searching for better optimization strategy." "\n" | |
| " Valid values include integers from " | |
| << static_cast<int32_t>(nvinfer1::TilingOptimizationLevel::kNONE) | |
| << " to the maximum tiling optimization level(" | |
| << static_cast<int32_t>(nvinfer1::TilingOptimizationLevel::kFULL) << ")." "\n" | |
| " --l2LimitForTiling Set the L2 cache usage limit for tiling optimization(default is -1)" "\n" | |
| " --remoteAutoTuningConfig Set the remote auto tuning config. Must be specified with --safe." "\n" | |
| " Format: protocol://username[:password]@hostname[:port]?param1=value1¶m2=value2" "\n" | |
| " Example: ssh://root:root@192.168.1.100:2213?remote_exec_path=/workspace/LWEServer&remote_lib_path=/workspace" "\n" | |
| ; | |
| // clang-format on | |
| os << std::flush; | |
| } | |
| void SystemOptions::help(std::ostream& os) | |
| { | |
| // clang-format off | |
| os << "=== System Options ===" << std::endl << | |
| " --device=N Select cuda device N (default = " << defaultDevice << ")" << std::endl << | |
| " --useDLACore=N Select DLA core N for layers that support DLA (default = none)" << std::endl << | |
| " --staticPlugins Plugin library (.so) to load statically (can be specified multiple times)" << std::endl << | |
| " --dynamicPlugins Plugin library (.so) to load dynamically and may be serialized with the engine if they are included in --setPluginsToSerialize (can be specified multiple times)" << std::endl << | |
| " --setPluginsToSerialize Plugin library (.so) to be serialized with the engine (can be specified multiple times)" << std::endl << | |
| " --ignoreParsedPluginLibs By default, when building a version-compatible engine, plugin libraries specified by the ONNX parser " << std::endl << | |
| " are implicitly serialized with the engine (unless --excludeLeanRuntime is specified) and loaded dynamically. " << std::endl << | |
| " Enable this flag to ignore these plugin libraries instead." << std::endl << | |
| " --safetyPlugins Plugin library (.so) for TensorRT auto safety to manually load safety plugins specified by the command line arguments." << std::endl << | |
| " Example: --safetyPlugins=/path/to/plugin_lib.so[pluginNamespace1::plugin1,pluginNamespace2::plugin2]." << std::endl << | |
| " The option can be specified multiple times with different plugin libraries." << std::endl; | |
| // clang-format on | |
| } | |
| void InferenceOptions::help(std::ostream& os) | |
| { | |
| // clang-format off | |
| os << "=== Inference Options ===" << std::endl << | |
| " --shapes=spec Set input shapes for dynamic shapes inference inputs." << std::endl << | |
| R"( Note: Input names can be wrapped with escaped single quotes (ex: 'Input:0').)" << std::endl << | |
| " Example input shapes spec: input0:1x3x256x256, input1:1x3x128x128" << std::endl << | |
| " For scalars (0-D shapes), use input0:scalar or simply input0: with nothing after the colon."<< std::endl << | |
| " Each input shape is supplied as a key-value pair where key is the input name and" << std::endl << | |
| " value is the dimensions (including the batch dimension) to be used for that input." << std::endl << | |
| " Each key-value pair has the key and value separated using a colon (:)." << std::endl << | |
| " Multiple input shapes can be provided via comma-separated key-value pairs, and each input " << std::endl << | |
| " name can contain at most one wildcard ('*') character." << std::endl << | |
| " --loadInputs=spec Load input values from files (default = generate random inputs). Input names can be " | |
| "wrapped with single quotes (ex: 'Input:0')" << std::endl << | |
| R"( Input values spec ::= Ival[","spec])" << std::endl << | |
| R"( Ival ::= name":"file)" << std::endl << | |
| " Consult the README for more information on generating files for custom inputs." << std::endl << | |
| " --iterations=N Run at least N inference iterations (default = " << defaultIterations << ")" << std::endl << | |
| " --warmUp=N Run for N milliseconds to warmup before measuring performance (default = " | |
| << defaultWarmUp << ")" << std::endl << | |
| " --duration=N Run performance measurements for at least N seconds wallclock time (default = " | |
| << defaultDuration << ")" << std::endl << | |
| " If -1 is specified, inference will keep running unless stopped manually" << std::endl << | |
| " --sleepTime=N Delay inference start with a gap of N milliseconds between launch and compute " | |
| "(default = " << defaultSleep << ")" << std::endl << | |
| " --idleTime=N Sleep N milliseconds between two continuous iterations" | |
| "(default = " << defaultIdle << ")" << std::endl << | |
| " --infStreams=N Instantiate N execution contexts to run inference concurrently " | |
| "(default = " << defaultStreams << ")" << std::endl << | |
| " --exposeDMA Serialize DMA transfers to and from device (default = disabled)." << std::endl << | |
| " --noDataTransfers Disable DMA transfers to and from device (default = enabled). Note some device-to-host" << std::endl << | |
| " data transfers will remain if output dumping is enabled via the --dumpOutput or" << std::endl << | |
| " --exportOutput flags." << std::endl << | |
| " --useManagedMemory Use managed memory instead of separate host and device allocations (default = disabled)." << std::endl << | |
| " --useSpinWait Actively synchronize on GPU events. This option may decrease synchronization time but " | |
| "increase CPU usage and power (default = disabled)" << std::endl << | |
| " --threads Enable multithreading to drive engines with independent threads" | |
| " or speed up refitting (default = disabled) " << std::endl << | |
| " --useCudaGraph Use CUDA graph to capture engine execution and then launch inference (default = disabled)." << std::endl << | |
| " This flag may be ignored if the graph capture fails." << std::endl << | |
| " --timeDeserialize Time the amount of time it takes to deserialize the network and exit." << std::endl << | |
| " --timeRefit Time the amount of time it takes to refit the engine before inference." << std::endl << | |
| " --separateProfileRun Do not attach the profiler in the benchmark run; if profiling is enabled, a second " | |
| "profile run will be executed (default = disabled)" << std::endl << | |
| " --skipInference Exit after the engine has been built and skip inference perf measurement " | |
| "(default = disabled)" << std::endl << | |
| " --persistentCacheRatio Set the persistentCacheLimit in ratio, 0.5 represent half of max persistent L2 size " | |
| "(default = 0)" << std::endl << | |
| " --useProfile Set the optimization profile for the inference context " | |
| "(default = " << defaultOptProfileIndex << " )." << std::endl << | |
| " --allocationStrategy=spec Specify how the internal device memory for inference is allocated." << std::endl << | |
| R"( Strategy: spec ::= "static"|"profile"|"runtime")" << std::endl << | |
| " static = Allocate device memory based on max size across all profiles." << std::endl << | |
| " profile = Allocate device memory based on max size of the current profile." << std::endl << | |
| " runtime = Allocate device memory based on the actual input shapes." << std::endl << | |
| " --saveDebugTensors Specify list of names of tensors to turn on the debug state" << std::endl << | |
| " and filename to save raw outputs to." << std::endl << | |
| " These tensors must be specified as debug tensors during build time." << std::endl << | |
| R"( Input values spec ::= Ival[","spec])" << std::endl << | |
| R"( Ival ::= name":"file)" << std::endl << | |
| " --saveAllDebugTensors Save all debug tensors to files. " << std::endl << | |
| " Including debug tensors marked by --markDebug and --markUnfusedTensorsAsDebugTensors" << std::endl << | |
| " Multiple file formats can be saved simultaneously." << std::endl << | |
| R"( Input values spec ::= format[","format])" << std::endl << | |
| R"( format ::= "summary"|"numpy"|"string"|"raw")" << std::endl << | |
| " --weightStreamingBudget Set the maximum amount of GPU memory TensorRT is allowed to use for weights." << std::endl << | |
| " It can take on the following values:" << std::endl << | |
| " -2: (default) Disable weight streaming at runtime." << std::endl << | |
| " -1: TensorRT will automatically decide the budget." << std::endl << | |
| " 0-100%: Percentage of streamable weights that reside on the GPU." << std::endl << | |
| " 0% saves the most memory but will have the worst performance." << std::endl << | |
| " Requires the '%' character." << std::endl << | |
| " >=0B: The exact amount of streamable weights that reside on the GPU. Supports the " << std::endl << | |
| " following base-2 suffixes: " << getAvailableUnitSuffixes() << "." << std::endl; | |
| // clang-format on | |
| } | |
| void ReportingOptions::help(std::ostream& os) | |
| { | |
| // clang-format off | |
| os << "=== Reporting Options ===" << std::endl << | |
| " --verbose Use verbose logging (default = false)" << std::endl << | |
| " --avgRuns=N Report performance measurements averaged over N consecutive " | |
| "iterations (default = " << defaultAvgRuns << ")" << std::endl << | |
| " --percentile=P1,P2,P3,... Report performance for the P1,P2,P3,... percentages (0<=P_i<=100, 0 " | |
| "representing max perf, and 100 representing min perf; (default" | |
| " = " << joinValuesToString(defaultPercentiles, ",") << "%)" << std::endl << | |
| " --dumpRefit Print the refittable layers and weights from a refittable " | |
| "engine" << std::endl << | |
| " --dumpOutput Print the output tensor(s) of the last inference iteration " | |
| "(default = disabled)" << std::endl << | |
| " --dumpRawBindingsToFile Print the input/output tensor(s) of the last inference iteration to file" | |
| "(default = disabled)" << std::endl << | |
| " --dumpProfile Print profile information per layer (default = disabled)" << std::endl << | |
| " --dumpLayerInfo Print layer information of the engine to console " | |
| "(default = disabled)" << std::endl << | |
| " --dumpOptimizationProfile Print the optimization profile(s) information " | |
| "(default = disabled)" << std::endl << | |
| " --exportTimes=<file> Write the timing results in a json file (default = disabled)" << std::endl << | |
| " --exportOutput=<file> Write the output tensors to a json file (default = disabled)" << std::endl << | |
| " --exportProfile=<file> Write the profile information per layer in a json file " | |
| "(default = disabled)" << std::endl << | |
| " --exportLayerInfo=<file> Write the layer information of the engine in a json file " | |
| "(default = disabled)" << std::endl; | |
| // clang-format on | |
| } | |
| void TaskInferenceOptions::help(std::ostream& os) | |
| { | |
| // clang-format off | |
| os << "=== Task Inference Options ===" << std::endl << | |
| " engine=<file> Specify a serialized engine for this task" << std::endl << | |
| " device=N Specify a GPU device for this task" << std::endl << | |
| " DLACore=N Specify a DLACore for this task" << std::endl << | |
| " batch=N Set batch size for implicit batch engines (default = " << defaultBatch << ")" << std::endl << | |
| " This option should not be used for explicit batch engines" << std::endl << | |
| " graph=1 Use cuda graph for this task" << std::endl << | |
| " persistentCacheRatio=[0-1] Set the persistentCacheLimit ratio for this task (default = 0)" << std::endl; | |
| // clang-format on | |
| } | |
| void helpHelp(std::ostream& os) | |
| { | |
| // clang-format off | |
| os << "=== Help ===" << std::endl << | |
| " --help, -h Print this message" << std::endl; | |
| // clang-format on | |
| } | |
| void AllOptions::help(std::ostream& os) | |
| { | |
| ModelOptions::help(os); | |
| os << std::endl; | |
| BuildOptions::help(os); | |
| os << std::endl; | |
| InferenceOptions::help(os); | |
| os << std::endl; | |
| ReportingOptions::help(os); | |
| os << std::endl; | |
| SystemOptions::help(os); | |
| os << std::endl; | |
| helpHelp(os); | |
| } | |
| void SafeBuilderOptions::printHelp(std::ostream& os) | |
| { | |
| // clang-format off | |
| os << "=== Mandatory ===" << std::endl << | |
| " --onnx=<file> ONNX model" << std::endl << | |
| " " << std::endl << | |
| "=== Optional ===" << std::endl << | |
| " --inputIOFormats=spec Type and format of each of the input tensors (default = all inputs in fp32:chw)" << std::endl << | |
| " See --outputIOFormats help for the grammar of type and format list." << std::endl << | |
| " Note: If this option is specified, please set comma-separated types and formats for all" << std::endl << | |
| " inputs following the same order as network inputs ID (even if only one input" << std::endl << | |
| " needs specifying IO format) or set the type and format once for broadcasting." << std::endl << | |
| " --outputIOFormats=spec Type and format of each of the output tensors (default = all outputs in fp32:chw)" << std::endl << | |
| " Note: If this option is specified, please set comma-separated types and formats for all" << std::endl << | |
| " outputs following the same order as network outputs ID (even if only one output" << std::endl << | |
| " needs specifying IO format) or set the type and format once for broadcasting." << std::endl << | |
| R"( IO Formats: spec ::= IOfmt[","spec])" << std::endl << | |
| " IOfmt ::= type:fmt" << std::endl << | |
| R"( type ::= "fp32"|"fp16"|"int32"|"int8")" << std::endl << | |
| R"( fmt ::= ("chw"|"chw2"|"hwc8"|"chw4"|"chw16"|"chw32"|"dhwc8"|)" << std::endl << | |
| R"( "cdhw32"|"hwc"|"dla_linear"|"dla_hwc4"|"hwc16"|"dhwc")["+"fmt])" << std::endl << | |
| " --int8 Enable int8 precision, in addition to fp16 (default = disabled)" << std::endl << | |
| " --consistency Perform consistency checking on safety certified engine" << std::endl << | |
| " --std Build standard serialized engine, (default = disabled)" << std::endl << | |
| " --calib=<file> Read INT8 calibration cache file" << std::endl << | |
| " --serialized=<file> Save the serialized network" << std::endl << | |
| " --staticPlugins Plugin library (.so) to load statically (can be specified multiple times)" << std::endl << | |
| " --verbose or -v Use verbose logging (default = false)" << std::endl << | |
| " --help or -h Print this message" << std::endl << | |
| " --noBuilderCache Disable timing cache in builder (default is to enable timing cache)" << std::endl << | |
| " --timingCacheFile=<file> Save/load the serialized global timing cache" << std::endl << | |
| " --sparsity=spec Control sparsity (default = disabled). " << std::endl << | |
| R"( Sparsity: spec ::= "disable", "enable", "force")" << std::endl << | |
| " Note: Description about each of these options is as below" << std::endl << | |
| " disable = do not enable sparse tactics in the builder (this is the default)" << std::endl << | |
| " enable = enable sparse tactics in the builder (but these tactics will only be" << std::endl << | |
| " considered if the weights have the right sparsity pattern)" << std::endl << | |
| " force = enable sparse tactics in the builder and force-overwrite the weights to have" << std::endl << | |
| " a sparsity pattern" << std::endl << | |
| " --avgTiming=M Set the number of times averaged in each iteration for kernel selection (default = " << std::endl << | |
| "" << defaultAvgTiming << ")" << std::endl << | |
| "" << std::endl; | |
| // clang-format on | |
| } | |
| } // namespace sample | |