G0-VLA / g0plus_dockerfile /docker-assets /data /TensorRT-10.13.0.35 /samples /common /sampleUtils.cpp
| /* | |
| * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| * SPDX-License-Identifier: Apache-2.0 | |
| * | |
| * Licensed under the Apache License, Version 2.0 (the "License"); | |
| * you may not use this file except in compliance with the License. | |
| * You may obtain a copy of the License at | |
| * | |
| * http://www.apache.org/licenses/LICENSE-2.0 | |
| * | |
| * Unless required by applicable law or agreed to in writing, software | |
| * distributed under the License is distributed on an "AS IS" BASIS, | |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| * See the License for the specific language governing permissions and | |
| * limitations under the License. | |
| */ | |
| using namespace nvinfer1; | |
| namespace sample | |
| { | |
| int64_t volume(nvinfer1::Dims const& dims, nvinfer1::Dims const& strides, int32_t vecDim, int32_t comps, int32_t batch) | |
| { | |
| int64_t maxNbElems = 1; | |
| for (int32_t i = 0; i < dims.nbDims; ++i) | |
| { | |
| // Get effective length of axis. | |
| int64_t d = dims.d[i]; | |
| // Any dimension is 0, it is an empty tensor. | |
| if (d == 0) | |
| { | |
| return 0; | |
| } | |
| if (i == vecDim) | |
| { | |
| d = samplesCommon::divUp(d, comps); | |
| } | |
| maxNbElems = std::max(maxNbElems, d * strides.d[i]); | |
| } | |
| return maxNbElems * batch * (vecDim < 0 ? 1 : comps); | |
| } | |
| nvinfer1::Dims toDims(std::vector<int64_t> const& vec) | |
| { | |
| int32_t limit = static_cast<int32_t>(nvinfer1::Dims::MAX_DIMS); | |
| if (static_cast<int32_t>(vec.size()) > limit) | |
| { | |
| sample::gLogWarning << "Vector too long, only first 8 elements are used in dimension." << std::endl; | |
| } | |
| // Pick first nvinfer1::Dims::MAX_DIMS elements | |
| nvinfer1::Dims dims{std::min(static_cast<int32_t>(vec.size()), limit), {}}; | |
| std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d)); | |
| return dims; | |
| } | |
| void loadFromFile(std::string const& fileName, char* dst, size_t size) | |
| { | |
| ASSERT(dst); | |
| std::ifstream file(fileName, std::ios::in | std::ios::binary); | |
| if (file.is_open()) | |
| { | |
| file.seekg(0, std::ios::end); | |
| int64_t fileSize = static_cast<int64_t>(file.tellg()); | |
| // Due to change from int32_t to int64_t VC engines created with earlier versions | |
| // may expect input of the half of the size | |
| if (fileSize != static_cast<int64_t>(size) && fileSize != static_cast<int64_t>(size * 2)) | |
| { | |
| std::ostringstream msg; | |
| msg << "Unexpected file size for input file: " << fileName << ". Note: Input binding size is: " << size | |
| << " bytes but the file size is " << fileSize | |
| << " bytes. Double check the size and datatype of the provided data."; | |
| throw std::invalid_argument(msg.str()); | |
| } | |
| // Move file pointer back to the beginning after reading file size. | |
| file.seekg(0, std::ios::beg); | |
| file.read(dst, size); | |
| size_t const nbBytesRead = file.gcount(); | |
| file.close(); | |
| if (nbBytesRead != size) | |
| { | |
| std::ostringstream msg; | |
| msg << "Unexpected file size for input file: " << fileName << ". Note: Expected: " << size | |
| << " bytes but only read: " << nbBytesRead << " bytes"; | |
| throw std::invalid_argument(msg.str()); | |
| } | |
| } | |
| else | |
| { | |
| std::ostringstream msg; | |
| msg << "Cannot open file " << fileName << "!"; | |
| throw std::invalid_argument(msg.str()); | |
| } | |
| } | |
| std::vector<std::string> splitToStringVec(std::string const& s, char separator, int64_t maxSplit) | |
| { | |
| std::vector<std::string> splitted; | |
| for (size_t start = 0; start < s.length();) | |
| { | |
| // If maxSplit is specified and we have reached maxSplit, emplace back the rest of the string and break the | |
| // loop. | |
| if (maxSplit >= 0 && static_cast<int64_t>(splitted.size()) == maxSplit) | |
| { | |
| splitted.emplace_back(s.substr(start, s.length() - start)); | |
| break; | |
| } | |
| size_t separatorIndex = s.find(separator, start); | |
| if (separatorIndex == std::string::npos) | |
| { | |
| separatorIndex = s.length(); | |
| } | |
| splitted.emplace_back(s.substr(start, separatorIndex - start)); | |
| // If the separator is the last character, then we should push an empty string at the end. | |
| if (separatorIndex == s.length() - 1) | |
| { | |
| splitted.emplace_back(""); | |
| } | |
| start = separatorIndex + 1; | |
| } | |
| return splitted; | |
| } | |
| bool broadcastIOFormats(std::vector<IOFormat> const& formats, size_t nbBindings, bool isInput /*= true*/) | |
| { | |
| bool broadcast = formats.size() == 1; | |
| bool validFormatsCount = broadcast || (formats.size() == nbBindings); | |
| if (!formats.empty() && !validFormatsCount) | |
| { | |
| if (isInput) | |
| { | |
| throw std::invalid_argument( | |
| "The number of inputIOFormats must match network's inputs or be one for broadcasting."); | |
| } | |
| throw std::invalid_argument( | |
| "The number of outputIOFormats must match network's outputs or be one for broadcasting."); | |
| } | |
| return broadcast; | |
| } | |
| void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vector<std::vector<int8_t>>& sparseWeights) | |
| { | |
| using TensorToLayer = std::unordered_map<nvinfer1::ITensor*, nvinfer1::ILayer*>; | |
| using LayerToTensor = std::unordered_map<nvinfer1::ILayer*, nvinfer1::ITensor*>; | |
| // 1. Collect layers and tensors information from the network. | |
| TensorToLayer matmulI2L; | |
| TensorToLayer constO2L; | |
| TensorToLayer shuffleI2L; | |
| LayerToTensor shuffleL2O; | |
| auto collectMappingInfo = [&](int32_t const idx) { | |
| ILayer* l = network.getLayer(idx); | |
| switch (l->getType()) | |
| { | |
| case nvinfer1::LayerType::kMATRIX_MULTIPLY: | |
| { | |
| // assume weights on the second input. | |
| matmulI2L.insert({l->getInput(1), l}); | |
| break; | |
| } | |
| case nvinfer1::LayerType::kCONSTANT: | |
| { | |
| DataType const dtype = static_cast<nvinfer1::IConstantLayer*>(l)->getWeights().type; | |
| if (dtype == nvinfer1::DataType::kFLOAT || dtype == nvinfer1::DataType::kHALF) | |
| { | |
| // Sparsify float only. | |
| constO2L.insert({l->getOutput(0), l}); | |
| } | |
| break; | |
| } | |
| case nvinfer1::LayerType::kSHUFFLE: | |
| { | |
| shuffleI2L.insert({l->getInput(0), l}); | |
| shuffleL2O.insert({l, l->getOutput(0)}); | |
| break; | |
| } | |
| default: break; | |
| } | |
| }; | |
| int32_t const nbLayers = network.getNbLayers(); | |
| for (int32_t i = 0; i < nbLayers; ++i) | |
| { | |
| collectMappingInfo(i); | |
| } | |
| if (matmulI2L.size() == 0 || constO2L.size() == 0) | |
| { | |
| // No MatrixMultiply or Constant layer found, no weights to sparsify. | |
| return; | |
| } | |
| // Helper for analysis | |
| auto isTranspose | |
| = [](nvinfer1::Permutation const& perm) -> bool { return (perm.order[0] == 1 && perm.order[1] == 0); }; | |
| auto is2D = [](nvinfer1::Dims const& dims) -> bool { return dims.nbDims == 2; }; | |
| auto isIdenticalReshape = [](nvinfer1::Dims const& dims) -> bool { | |
| for (int32_t i = 0; i < dims.nbDims; ++i) | |
| { | |
| if (dims.d[i] != i || dims.d[i] != -1) | |
| { | |
| return false; | |
| } | |
| } | |
| return true; | |
| }; | |
| auto tensorReachedViaTranspose = [&](nvinfer1::ITensor* t, bool& needTranspose) -> ITensor* { | |
| while (shuffleI2L.find(t) != shuffleI2L.end()) | |
| { | |
| nvinfer1::IShuffleLayer* s = static_cast<nvinfer1::IShuffleLayer*>(shuffleI2L.at(t)); | |
| if (!is2D(s->getInput(0)->getDimensions()) || !is2D(s->getReshapeDimensions()) | |
| || !isIdenticalReshape(s->getReshapeDimensions())) | |
| { | |
| break; | |
| } | |
| if (isTranspose(s->getFirstTranspose())) | |
| { | |
| needTranspose = !needTranspose; | |
| } | |
| if (isTranspose(s->getSecondTranspose())) | |
| { | |
| needTranspose = !needTranspose; | |
| } | |
| t = shuffleL2O.at(s); | |
| } | |
| return t; | |
| }; | |
| // 2. Forward analysis to collect the Constant layers connected to MatMul via Transpose | |
| std::unordered_map<nvinfer1::IConstantLayer*, bool> constantLayerToSparse; | |
| for (auto& o2l : constO2L) | |
| { | |
| // If need to transpose the weights of the Constant layer. | |
| // Need to transpose by default due to semantic difference. | |
| bool needTranspose{true}; | |
| ITensor* t = tensorReachedViaTranspose(o2l.first, needTranspose); | |
| if (matmulI2L.find(t) == matmulI2L.end()) | |
| { | |
| continue; | |
| } | |
| // check MatMul params... | |
| IMatrixMultiplyLayer* mm = static_cast<nvinfer1::IMatrixMultiplyLayer*>(matmulI2L.at(t)); | |
| bool const twoInputs = mm->getNbInputs() == 2; | |
| bool const all2D = is2D(mm->getInput(0)->getDimensions()) && is2D(mm->getInput(1)->getDimensions()); | |
| bool const isSimple = mm->getOperation(0) == nvinfer1::MatrixOperation::kNONE | |
| && mm->getOperation(1) != nvinfer1::MatrixOperation::kVECTOR; | |
| if (!(twoInputs && all2D && isSimple)) | |
| { | |
| continue; | |
| } | |
| if (mm->getOperation(1) == nvinfer1::MatrixOperation::kTRANSPOSE) | |
| { | |
| needTranspose = !needTranspose; | |
| } | |
| constantLayerToSparse.insert({static_cast<IConstantLayer*>(o2l.second), needTranspose}); | |
| } | |
| // 3. Finally, sparsify the weights | |
| auto sparsifyConstantWeights = [&sparseWeights](nvinfer1::IConstantLayer* layer, bool const needTranspose) { | |
| Dims dims = layer->getOutput(0)->getDimensions(); | |
| ASSERT(dims.nbDims == 2); | |
| int32_t const idxN = needTranspose ? 1 : 0; | |
| int32_t const n = dims.d[idxN]; | |
| int32_t const k = dims.d[1 - idxN]; | |
| sparseWeights.emplace_back(); | |
| std::vector<int8_t>& spw = sparseWeights.back(); | |
| Weights w = layer->getWeights(); | |
| DataType const dtype = w.type; | |
| ASSERT(dtype == nvinfer1::DataType::kFLOAT | |
| || dtype == nvinfer1::DataType::kHALF); // non-float weights should have been ignored. | |
| if (needTranspose) | |
| { | |
| if (dtype == nvinfer1::DataType::kFLOAT) | |
| { | |
| spw.resize(w.count * sizeof(float)); | |
| transpose2DWeights<float>(spw.data(), w.values, k, n); | |
| } | |
| else if (dtype == nvinfer1::DataType::kHALF) | |
| { | |
| spw.resize(w.count * sizeof(half_float::half)); | |
| transpose2DWeights<half_float::half>(spw.data(), w.values, k, n); | |
| } | |
| w.values = spw.data(); | |
| std::vector<int8_t> tmpW; | |
| sparsify(w, n, 1, tmpW); | |
| if (dtype == nvinfer1::DataType::kFLOAT) | |
| { | |
| transpose2DWeights<float>(spw.data(), tmpW.data(), n, k); | |
| } | |
| else if (dtype == nvinfer1::DataType::kHALF) | |
| { | |
| transpose2DWeights<half_float::half>(spw.data(), tmpW.data(), n, k); | |
| } | |
| } | |
| else | |
| { | |
| sparsify(w, n, 1, spw); | |
| } | |
| w.values = spw.data(); | |
| layer->setWeights(w); | |
| }; | |
| for (auto& l : constantLayerToSparse) | |
| { | |
| sparsifyConstantWeights(l.first, l.second); | |
| } | |
| } | |
| template <typename L> | |
| void setSparseWeights(L& l, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights) | |
| { | |
| auto weights = l.getKernelWeights(); | |
| sparsify(weights, k, trs, sparseWeights); | |
| weights.values = sparseWeights.data(); | |
| l.setKernelWeights(weights); | |
| } | |
| // Explicit instantiation | |
| template void setSparseWeights<IConvolutionLayer>( | |
| IConvolutionLayer& l, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights); | |
| void sparsify(nvinfer1::INetworkDefinition& network, std::vector<std::vector<int8_t>>& sparseWeights) | |
| { | |
| for (int32_t l = 0; l < network.getNbLayers(); ++l) | |
| { | |
| auto* layer = network.getLayer(l); | |
| auto const t = layer->getType(); | |
| if (t == nvinfer1::LayerType::kCONVOLUTION) | |
| { | |
| auto& conv = *static_cast<IConvolutionLayer*>(layer); | |
| auto const& dims = conv.getKernelSizeNd(); | |
| ASSERT(dims.nbDims == 2 || dims.nbDims == 3); | |
| auto const k = conv.getNbOutputMaps(); | |
| auto const trs = std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies<int32_t>()); | |
| sparseWeights.emplace_back(); | |
| setSparseWeights(conv, k, trs, sparseWeights.back()); | |
| } | |
| } | |
| sparsifyMatMulKernelWeights(network, sparseWeights); | |
| sample::gLogVerbose << "--sparsity=force pruned " << sparseWeights.size() << " weights to be sparsity pattern." | |
| << std::endl; | |
| sample::gLogVerbose << "--sparsity=force has been deprecated. Please use <polygraphy surgeon prune> to rewrite the " | |
| "weights to a sparsity pattern and then run with --sparsity=enable" | |
| << std::endl; | |
| } | |
| void sparsify(Weights const& weights, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights) | |
| { | |
| switch (weights.type) | |
| { | |
| case DataType::kFLOAT: | |
| sparsify(static_cast<float const*>(weights.values), weights.count, k, trs, sparseWeights); | |
| break; | |
| case DataType::kHALF: | |
| sparsify(static_cast<half_float::half const*>(weights.values), weights.count, k, trs, sparseWeights); | |
| break; | |
| case DataType::kBF16: | |
| sparsify(static_cast<BFloat16 const*>(weights.values), weights.count, k, trs, sparseWeights); | |
| break; | |
| case DataType::kINT8: | |
| case DataType::kINT32: | |
| case DataType::kUINT8: | |
| case DataType::kBOOL: | |
| case DataType::kINT4: | |
| case DataType::kFP8: | |
| case DataType::kINT64: | |
| case DataType::kFP4: ASSERT(false && "Unsupported data type"); | |
| case DataType::kE8M0: ASSERT(false && "E8M0 is not supported"); | |
| } | |
| } | |
| template <typename T> | |
| void print(std::ostream& os, T v) | |
| { | |
| os << v; | |
| } | |
| void print(std::ostream& os, int8_t v) | |
| { | |
| os << static_cast<int32_t>(v); | |
| } | |
| void print(std::ostream& os, __half v) | |
| { | |
| os << static_cast<float>(v); | |
| } | |
| void print(std::ostream& os, __nv_fp8_e4m3 v) | |
| { | |
| os << static_cast<float>(v); | |
| } | |
| int32_t dataOffsetFromDims(int64_t v, Dims const& dims, Dims const& strides, int32_t vectorDim, int32_t spv) | |
| { | |
| int32_t dataOffset = 0; | |
| for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) | |
| { | |
| int32_t dimVal = v % dims.d[dimIndex]; | |
| if (dimIndex == vectorDim) | |
| { | |
| dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv; | |
| } | |
| else | |
| { | |
| dataOffset += dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv); | |
| } | |
| v /= dims.d[dimIndex]; | |
| ASSERT(v >= 0); | |
| } | |
| return dataOffset; | |
| } | |
| template <typename T> | |
| void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, | |
| Dims const& strides, int32_t vectorDim, int32_t spv) | |
| { | |
| auto const vol = volume(dims); | |
| T const* typedBuffer = static_cast<T const*>(buffer); | |
| for (int64_t v = 0; v < vol; ++v) | |
| { | |
| int32_t dataOffset = dataOffsetFromDims(v, dims, strides, vectorDim, spv); | |
| if (v > 0) | |
| { | |
| os << separator; | |
| } | |
| print(os, typedBuffer[dataOffset]); | |
| } | |
| } | |
| void dumpInt4Buffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, | |
| Dims const& strides, int32_t vectorDim, int32_t spv) | |
| { | |
| auto const vol = volume(dims); | |
| uint8_t const* typedBuffer = static_cast<uint8_t const*>(buffer); | |
| for (int64_t v = 0; v < vol; ++v) | |
| { | |
| int32_t dataOffset = dataOffsetFromDims(v, dims, strides, vectorDim, spv); | |
| if (v > 0) | |
| { | |
| os << separator; | |
| } | |
| auto value = typedBuffer[dataOffset / 2]; | |
| if (dataOffset % 2 == 0) | |
| { | |
| // Cast to int8_t before right shift, so right-shift will sign-extend. | |
| // Left shift on int8_t can be undefined behaviour, must perform left shift on uint8_t. | |
| os << (static_cast<int8_t>(value << 4) >> 4); | |
| } | |
| else | |
| { | |
| os << (static_cast<int8_t>(value) >> 4); | |
| } | |
| } | |
| } | |
| // Explicit instantiation | |
| template void dumpBuffer<bool>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, | |
| Dims const& strides, int32_t vectorDim, int32_t spv); | |
| template void dumpBuffer<int32_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, | |
| Dims const& strides, int32_t vectorDim, int32_t spv); | |
| template void dumpBuffer<int8_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, | |
| Dims const& strides, int32_t vectorDim, int32_t spv); | |
| template void dumpBuffer<float>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, | |
| Dims const& strides, int32_t vectorDim, int32_t spv); | |
| template void dumpBuffer<__half>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, | |
| Dims const& strides, int32_t vectorDim, int32_t spv); | |
| template void dumpBuffer<BFloat16>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, | |
| Dims const& strides, int32_t vectorDim, int32_t spv); | |
| template void dumpBuffer<__nv_fp8_e4m3>(void const* buffer, std::string const& separator, std::ostream& os, | |
| Dims const& dims, Dims const& strides, int32_t vectorDim, int32_t spv); | |
| template void dumpBuffer<uint8_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, | |
| Dims const& strides, int32_t vectorDim, int32_t spv); | |
| template void dumpBuffer<int64_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, | |
| Dims const& strides, int32_t vectorDim, int32_t spv); | |
| template <typename T> | |
| void sparsify(T const* values, int64_t count, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights) | |
| { | |
| auto const c = count / (k * trs); | |
| sparseWeights.resize(count * sizeof(T)); | |
| auto* sparseValues = reinterpret_cast<T*>(sparseWeights.data()); | |
| constexpr int32_t window = 4; | |
| constexpr int32_t nonzeros = 2; | |
| int32_t const crs = c * trs; | |
| auto const getIndex = [=](int32_t ki, int32_t ci, int32_t rsi) { return ki * crs + ci * trs + rsi; }; | |
| for (int64_t ki = 0; ki < k; ++ki) | |
| { | |
| for (int64_t rsi = 0; rsi < trs; ++rsi) | |
| { | |
| int32_t w = 0; | |
| int32_t nz = 0; | |
| for (int64_t ci = 0; ci < c; ++ci) | |
| { | |
| auto const index = getIndex(ki, ci, rsi); | |
| if (nz < nonzeros) | |
| { | |
| sparseValues[index] = values[index]; | |
| ++nz; | |
| } | |
| else | |
| { | |
| sparseValues[index] = 0; | |
| } | |
| if (++w == window) | |
| { | |
| w = 0; | |
| nz = 0; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| // Explicit instantiation | |
| template void sparsify<float>( | |
| float const* values, int64_t count, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights); | |
| template void sparsify<half_float::half>( | |
| half_float::half const* values, int64_t count, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights); | |
| template <typename T> | |
| void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n) | |
| { | |
| ASSERT(dst != src); | |
| T* tdst = reinterpret_cast<T*>(dst); | |
| T const* tsrc = reinterpret_cast<T const*>(src); | |
| for (int32_t mi = 0; mi < m; ++mi) | |
| { | |
| for (int32_t ni = 0; ni < n; ++ni) | |
| { | |
| int32_t const isrc = mi * n + ni; | |
| int32_t const idst = ni * m + mi; | |
| tdst[idst] = tsrc[isrc]; | |
| } | |
| } | |
| } | |
| // Explicit instantiation | |
| template void transpose2DWeights<float>(void* dst, void const* src, int32_t const m, int32_t const n); | |
| template void transpose2DWeights<half_float::half>(void* dst, void const* src, int32_t const m, int32_t const n); | |
| template <typename T, typename std::enable_if_t<std::is_integral_v<T>, bool>> | |
| void fillBuffer(void* buffer, int64_t volume, int32_t min, int32_t max) | |
| { | |
| T* typedBuffer = static_cast<T*>(buffer); | |
| std::default_random_engine engine; | |
| std::uniform_int_distribution<int32_t> distribution(min, max); | |
| auto generator = [&engine, &distribution]() { return static_cast<T>(distribution(engine)); }; | |
| std::generate(typedBuffer, typedBuffer + volume, generator); | |
| } | |
| template <typename T, typename std::enable_if_t<!std::is_integral_v<T>, bool>> | |
| void fillBuffer(void* buffer, int64_t volume, float min, float max) | |
| { | |
| T* typedBuffer = static_cast<T*>(buffer); | |
| std::default_random_engine engine; | |
| std::uniform_real_distribution<float> distribution(min, max); | |
| auto generator = [&engine, &distribution]() { return static_cast<T>(distribution(engine)); }; | |
| std::generate(typedBuffer, typedBuffer + volume, generator); | |
| } | |
| // Explicit instantiation | |
| template void fillBuffer<bool>(void* buffer, int64_t volume, int32_t min, int32_t max); | |
| template void fillBuffer<int32_t>(void* buffer, int64_t volume, int32_t min, int32_t max); | |
| template void fillBuffer<int8_t>(void* buffer, int64_t volume, int32_t min, int32_t max); | |
| template void fillBuffer<float>(void* buffer, int64_t volume, float min, float max); | |
| template void fillBuffer<__half>(void* buffer, int64_t volume, float min, float max); | |
| template void fillBuffer<BFloat16>(void* buffer, int64_t volume, float min, float max); | |
| template void fillBuffer<__nv_fp8_e4m3>(void* buffer, int64_t volume, float min, float max); | |
| template void fillBuffer<uint8_t>(void* buffer, int64_t volume, int32_t min, int32_t max); | |
| template void fillBuffer<int64_t>(void* buffer, int64_t volume, int32_t min, int32_t max); | |
| bool matchStringWithOneWildcard(std::string const& pattern, std::string const& target) | |
| { | |
| auto const splitPattern = splitToStringVec(pattern, '*', 1); | |
| // If there is no wildcard, return if the two strings match exactly. | |
| if (splitPattern.size() == 1) | |
| { | |
| return pattern == target; | |
| } | |
| // Otherwise, target must follow prefix+anything+postfix pattern. | |
| return target.size() >= (splitPattern[0].size() + splitPattern[1].size()) && target.find(splitPattern[0]) == 0 | |
| && target.rfind(splitPattern[1]) == (target.size() - splitPattern[1].size()); | |
| } | |
| } // namespace sample | |