File size: 44,820 Bytes
38fb1f6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 | /*
* SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef NV_INFER_RUNTIME_PLUGIN_H
#define NV_INFER_RUNTIME_PLUGIN_H
#define NV_INFER_INTERNAL_INCLUDE 1
#include "NvInferPluginBase.h"
#undef NV_INFER_INTERNAL_INCLUDE
//!
//! \file NvInferRuntimePlugin.h
//!
//! This file contains common definitions, data structures and interfaces that relate to plugins and are shared
//! between the standard and safe runtime.
//!
//! \warning Do not directly include this file. Instead include NvInferRuntime.h
//!
//!
//! \namespace nvinfer1
//!
//! \brief The TensorRT API version 1 namespace.
//!
namespace nvinfer1
{
enum class TensorFormat : int32_t;
namespace v_1_0
{
class IGpuAllocator;
} // namespace v_1_0
using IGpuAllocator = v_1_0::IGpuAllocator;
//!
//! \brief PluginFormat is reserved for backward compatibility.
//!
//! \see IPluginV2::supportsFormat()
//!
using PluginFormat = TensorFormat;
//!
//! \brief Bit at the plugin version to identify that it is a plugin.
//!
static constexpr int32_t kPLUGIN_VERSION_PYTHON_BIT = 0x40;
//!
//! \struct PluginTensorDesc
//!
//! \brief Fields that a plugin might see for an input or output.
//!
//! Scale is only valid when data type is DataType::kINT8. TensorRT will set
//! the value to -1.0F if it is invalid.
//!
//! \see IPluginV2IOExt::supportsFormatCombination
//! \see IPluginV2IOExt::configurePlugin
//!
struct PluginTensorDesc
{
//! Dimensions.
Dims dims;
//! \warning DataType:kBOOL and DataType::kUINT8 are not supported.
DataType type;
//! Tensor format.
TensorFormat format;
//! Scale for INT8 data type.
float scale;
};
//!
//! \struct PluginVersion
//!
//! \brief Definition of plugin versions.
//!
//! Tag for plug-in versions. Used in upper byte of getTensorRTVersion().
//!
//! \deprecated Deprecated in TensorRT 10.10. PluginVersion is used only in relation to IPluginV2-descendent plugin
//! interfaces, which are all deprecated.
//!
enum class PluginVersion : uint8_t
{
//! IPluginV2
kV2 TRT_DEPRECATED_ENUM = 0,
//! IPluginV2Ext
kV2_EXT TRT_DEPRECATED_ENUM = 1,
//! IPluginV2IOExt
kV2_IOEXT TRT_DEPRECATED_ENUM = 2,
//! IPluginV2DynamicExt
kV2_DYNAMICEXT TRT_DEPRECATED_ENUM = 3,
//! IPluginV2DynamicExt-based Python plugins
kV2_DYNAMICEXT_PYTHON TRT_DEPRECATED_ENUM = kPLUGIN_VERSION_PYTHON_BIT | 3
};
//!
//! \enum PluginCreatorVersion
//!
//! \brief Enum to identify version of the plugin creator.
//!
//! \deprecated Deprecated in TensorRT 10.10. PluginCreatorVersion is used only in relation to plugin creators based
//! off IPluginCreator, which is deprecated.
//!
enum class PluginCreatorVersion : int32_t
{
//! IPluginCreator
kV1 TRT_DEPRECATED_ENUM = 0,
//! IPluginCreator-based Python plugin creators
kV1_PYTHON TRT_DEPRECATED_ENUM = kPLUGIN_VERSION_PYTHON_BIT
};
//!
//! \class IPluginV2
//!
//! \brief Plugin class for user-implemented layers.
//!
//! Plugins are a mechanism for applications to implement custom layers. When
//! combined with IPluginCreator it provides a mechanism to register plugins and
//! look up the Plugin Registry during de-serialization.
//!
//! \see IPluginCreator
//! \see IPluginRegistry
//!
//! \deprecated Deprecated in TensorRT 8.5. Implement IPluginV3 instead.
//!
class TRT_DEPRECATED IPluginV2
{
public:
//!
//! \brief Return the API version with which this plugin was built.
//!
//! Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with
//! plugins.
//!
//! \return The TensorRT version in the format (major * 100 + minor) * 100 + patch.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, the implementation provided here is safe to call from any thread.
//!
virtual int32_t getTensorRTVersion() const noexcept
{
return NV_TENSORRT_VERSION;
}
//!
//! \brief Return the plugin type. Should match the plugin name returned by the corresponding plugin creator
//!
//! \see IPluginCreator::getPluginName()
//!
//! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
//! NULL terminator.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual AsciiChar const* getPluginType() const noexcept = 0;
//!
//! \brief Return the plugin version. Should match the plugin version returned by the corresponding plugin creator
//!
//! \see IPluginCreator::getPluginVersion()
//!
//! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
//! NULL terminator.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual AsciiChar const* getPluginVersion() const noexcept = 0;
//!
//! \brief Get the number of outputs from the layer.
//!
//! \return The number of outputs, which is a positive integer.
//!
//! This function is called by the implementations of INetworkDefinition and IBuilder. In particular, it is called
//! prior to any call to initialize().
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual int32_t getNbOutputs() const noexcept = 0;
//!
//! \brief Get the dimension of an output tensor.
//!
//! \param index The index of the output tensor. Will lie in the valid range (between 0 and getNbOutputs()-1
//! inclusive).
//! \param inputs The input tensor dimensions. Will be the start address of a Dims array of length nbInputDims.
//! \param nbInputDims The number of input tensors. Will be a non-negative integer.
//!
//! \return The output tensor dimensions if the index is in the valid range.
//! An invalid value of Dims{-1, {}} must be returned if the index is not in the valid range.
//!
//! This function is called by the implementations of INetworkDefinition and IBuilder. In particular, it is called
//! prior to any call to initialize().
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
//! \note In any non-IPluginV2DynamicExt plugin, batch size must not be included in the returned dimensions,
//! even if the plugin is expected to be run in a network with explicit batch mode enabled.
//! Please see the TensorRT Developer Guide for more details on how plugin inputs and outputs behave.
//!
virtual Dims getOutputDimensions(int32_t index, Dims const* inputs, int32_t nbInputDims) noexcept = 0;
//!
//! \brief Check format support.
//!
//! \param type DataType requested.
//! \param format PluginFormat requested.
//!
//! \return true if the plugin supports the type-format combination.
//!
//! This function is called by the implementations of INetworkDefinition, IBuilder, and
//! safe::ICudaEngine/ICudaEngine. In particular, it is called when creating an engine and when deserializing an
//! engine.
//!
//! \warning for the format field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and PluginFormat::kCHW32
//! will not be passed in, this is to keep backward compatibility with TensorRT 5.x series. Use PluginV2IOExt
//! or PluginV2DynamicExt for other PluginFormats.
//!
//! \warning DataType:kBOOL and DataType::kUINT8 are not supported.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual bool supportsFormat(DataType type, PluginFormat format) const noexcept = 0;
//!
//! \brief Configure the layer.
//!
//! This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
//! algorithm choices on the basis of its weights, dimensions, and maximum batch size.
//!
//! \param inputDims The input tensor dimensions. Will be the start address of a Dims array of length nbInputs.
//! \param nbInputs The number of inputs. Will be a non-negative integer.
//! \param outputDims The output tensor dimensions. Will be the start address of a Dims array of length nbOutputs.
//! \param nbOutputs The number of outputs. Will be a positive integer identical to the return value of
//! getNbOutputs().
//! \param type The data type selected for the engine.
//! \param format The format selected for the engine.
//! \param maxBatchSize The maximum batch size. Will be a positive integer.
//!
//! The dimensions passed here do not include the outermost batch size (i.e. for 2D image networks, they will be
//! 3-dimensional CHW dimensions).
//!
//! \warning for the format field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and PluginFormat::kCHW32
//! will not be passed in, this is to keep backward compatibility with TensorRT 5.x series. Use PluginV2IOExt
//! or PluginV2DynamicExt for other PluginFormats.
//!
//! \warning DataType:kBOOL and DataType::kUINT8 are not supported.
//!
//! \see clone()
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin. However, TensorRT
//! will not call this method from two threads simultaneously on a given clone of a plugin.
//!
virtual void configureWithFormat(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs,
DataType type, PluginFormat format, int32_t maxBatchSize) noexcept
= 0;
//!
//! \brief Initialize the layer for execution. This is called when the engine is created.
//!
//! \return 0 for success, else non-zero (which will cause engine termination).
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when using multiple
//! execution contexts using this plugin.
//!
virtual int32_t initialize() noexcept = 0;
//!
//! \brief Release resources acquired during plugin layer initialization. This is called when the engine is
//! destroyed.
//!
//! \see initialize()
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when using multiple
//! execution contexts using this plugin. However, TensorRT will not call this method from
//! two threads simultaneously on a given clone of a plugin.
//!
virtual void terminate() noexcept = 0;
//!
//! \brief Find the workspace size required by the layer.
//!
//! This function is called during engine startup, after initialize(). The workspace size returned must be
//! sufficient for any batch size up to the maximum.
//!
//! \param maxBatchSize The maximum batch size, which will be a positive integer.
//!
//! \return The workspace size in bytes, i.e. the device memory size that the plugin requires for its internal
//! computations.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin. However, TensorRT
//! will not call this method from two threads simultaneously on a given clone of a plugin.
//!
virtual size_t getWorkspaceSize(int32_t maxBatchSize) const noexcept = 0;
//!
//! \brief Execute the layer.
//!
//! \param batchSize The number of inputs in the batch.
//! \param inputs The memory for the input tensors. Will be an array of device addresses corresponding to input
//! tensors of length nbInputs, where nbInputs is the second parameter passed to configureWithFormat().
//! The i-th input tensor will have the dimensions inputDims[i], where inputDims is the first parameter
//! that was passed to configureWithFormat().
//! \param outputs The memory for the output tensors. Will be an array of device addresses corresponding to output
//! tensors of length getNbOutputs().
//! \param workspace Workspace for execution. Will be the start address of a device buffer whose length will be at
//! least getWorkspaceSize(batchSize).
//! \param stream The stream in which to execute the kernels. This will be a valid CUDA stream.
//!
//! \return 0 for success, else non-zero (which will cause engine termination).
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when multiple execution contexts are used during runtime.
//!
virtual int32_t enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace,
cudaStream_t stream) noexcept
= 0;
//!
//! \brief Find the size of the serialization buffer required to store the plugin configuration in a binary file.
//!
//! \return The size of the serialization buffer in bytes.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual size_t getSerializationSize() const noexcept = 0;
//!
//! \brief Serialize the layer.
//!
//! \param buffer A pointer to a host buffer to serialize data. Size of buffer will be at least as large as the
//! value returned by getSerializationSize.
//!
//! \see getSerializationSize()
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual void serialize(void* buffer) const noexcept = 0;
//!
//! \brief Destroy the plugin object. This will be called when the network, builder or engine is destroyed.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual void destroy() noexcept = 0;
//!
//! \brief Clone the plugin object. This copies over internal plugin parameters and returns a new plugin object with
//! these parameters.
//!
//! The TensorRT runtime calls clone() to clone the plugin when an execution context is created for an engine,
//! after the engine has been created. The runtime does not call initialize() on the cloned plugin,
//! so the cloned plugin must be created in an initialized state.
//!
//! \return A cloned plugin object in an initialized state with the same parameters as the current object.
//! nullptr must be returned if the cloning fails, e.g. because of resource exhaustion.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when creating multiple
//! execution contexts.
//!
virtual IPluginV2* clone() const noexcept = 0;
//!
//! \brief Set the namespace that this plugin object belongs to. Ideally, all plugin
//! objects from the same plugin library must have the same namespace.
//!
//! \param pluginNamespace The namespace for the plugin object.
//!
//! \warning The string pluginNamespace will be NULL-terminated and have a length of 1024 bytes or less including the
//! NULL terminator.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual void setPluginNamespace(AsciiChar const* pluginNamespace) noexcept = 0;
//!
//! \brief Return the namespace of the plugin object.
//!
//! \return The namespace string that was passed to setPluginNamespace(), possibly after truncation to 1024 bytes
//! if a longer string was passed. An empty string must be returned as default value.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
// @cond SuppressDoxyWarnings
IPluginV2() = default;
virtual ~IPluginV2() noexcept = default;
// @endcond
protected:
// @cond SuppressDoxyWarnings
IPluginV2(IPluginV2 const&) = default;
IPluginV2(IPluginV2&&) = default;
IPluginV2& operator=(IPluginV2 const&) & = default;
IPluginV2& operator=(IPluginV2&&) & = default;
// @endcond
};
//!
//! \class IPluginV2Ext
//!
//! \brief Plugin class for user-implemented layers.
//!
//! Plugins are a mechanism for applications to implement custom layers. This
//! interface provides additional capabilities to the IPluginV2 interface by
//! supporting different output data types and broadcast across batches.
//!
//! \see IPluginV2
//!
//! \deprecated Deprecated in TensorRT 8.5. Implement IPluginV3 instead.
//!
class TRT_DEPRECATED IPluginV2Ext : public IPluginV2
{
public:
//!
//! \brief Return the DataType of the plugin output at the requested index.
//!
//! \param index The output tensor index in the valid range between 0 and getNbOutputs()-1.
//! \param inputTypes The data types of the input tensors, stored in an array of length nbInputs.
//! \param nbInputs The number of input tensors. Will be a non-negative integer.
//!
//! \return The data type of the output tensor with the provided index if the input tensors have the data types
//! provided in inputTypes, provided the output tensor index is in the valid range. DataType::kFLOAT must be
//! returned if the index is not in the valid range.
//!
//! The default behavior must be to return the type of the first input, or DataType::kFLOAT if the layer has no
//! inputs. The returned data type must have a format that is supported by the plugin.
//!
//! \see supportsFormat()
//!
//! \warning DataType:kBOOL and DataType::kUINT8 are not supported.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual nvinfer1::DataType getOutputDataType(
int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept
= 0;
//!
//! \brief Return true if the output tensor is broadcast across a batch.
//!
//! \param outputIndex The index of the output tensor, which will be in the valid range between 0 and
//! nbOutputs()-1.
//! \param inputIsBroadcasted A boolean array of length nbInputs. The i-th element will be true if and only if
//! the tensor for the ith input is broadcast across a batch.
//! \param nbInputs The number of inputs. Will be a non-negative integer.
//!
//! The values in inputIsBroadcasted refer to broadcasting at the semantic level,
//! i.e. are unaffected by whether method canBroadcastInputAcrossBatch requests
//! physical replication of the values.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
//! \deprecated Deprecated in TensorRT 10.0. Implicit batch support is removed in TensorRT 10.0.
//!
TRT_DEPRECATED virtual bool isOutputBroadcastAcrossBatch(
int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept
= 0;
//!
//! \brief Return true if the plugin can use an input tensor that is broadcast across batch without replication.
//!
//! \param inputIndex Index of input that could be broadcast. Will be in the valid range between 0 and
//! nbInputs - 1 where nbInputs is the maximum number of input tensors supported by this plugin.
//!
//! \return true if the index is in the valid range and the plugin is able to broadcast a single copy of this
//! input tensor across the batch. False otherwise.
//!
//! For each input whose tensor is semantically broadcast across a batch,
//! TensorRT calls this method before calling configurePlugin.
//! If canBroadcastInputAcrossBatch returns true, TensorRT will not replicate the input tensor;
//! i.e., there will be a single copy that the plugin must share across the batch.
//! If it returns false, TensorRT will replicate the input tensor
//! so that it appears like a non-broadcasted tensor.
//!
//! This method is called only for inputs that can be broadcast.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
//! \deprecated Deprecated in TensorRT 10.0. Implicit batch support is removed in TensorRT 10.0.
//!
TRT_DEPRECATED virtual bool canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept = 0;
//!
//! \brief Configure the layer with input and output data types.
//!
//! This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
//! algorithm choices on the basis of its weights, dimensions, data types and maximum batch size.
//!
//! \param inputDims The input tensor dimensions. Will be an array of length nbInputs.
//! \param nbInputs The number of inputs. Will be a non-negative integer.
//! \param outputDims The output tensor dimensions. Will be an array of length nbOutputs.
//! \param nbOutputs The number of outputs. Will be a positive integer.
//! \param inputTypes The data types selected for the plugin inputs. Will be an array of length nbInputs.
//! \param outputTypes The data types selected for the plugin outputs. Will be an array of length nbOutputs.
//! \param inputIsBroadcast True for each input that the plugin must broadcast across the batch.
//! Will be an array of length nbInputs.
//! \param outputIsBroadcast True for each output that TensorRT will broadcast across the batch.
//! Will be an array of length nbOutputs.
//! \param floatFormat The format selected for the engine for the floating point inputs/outputs.
//! \param maxBatchSize The maximum batch size. Will be a positive integer.
//!
//! The dimensions passed here do not include the outermost batch size (i.e. for 2D image networks, they will be
//! 3-dimensional CHW dimensions). When inputIsBroadcast or outputIsBroadcast is true, the outermost batch size for
//! that input or output must be treated as if it is one.
//! Index 'i' of inputIsBroadcast is true only if the input is semantically broadcast across the batch and
//! calling canBroadcastInputAcrossBatch with argument 'i' returns true.
//! Index 'i' of outputIsBroadcast is true only if calling isOutputBroadcastAcrossBatch with argument 'i'
//! returns true.
//!
//! \warning for the floatFormat field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and
//! PluginFormat::kCHW32 will not be passed in, this is to keep backward compatibility with TensorRT 5.x series. Use
//! PluginV2IOExt or PluginV2DynamicExt for other PluginFormats.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin. However, TensorRT
//! will not call this method from two threads simultaneously on a given clone of a plugin.
//!
virtual void configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs,
DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast,
bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept
= 0;
IPluginV2Ext() = default;
~IPluginV2Ext() override = default;
//!
//! \brief Attach the plugin object to an execution context and grant the plugin the access to some context
//! resources.
//!
//! \param cudnn The cuDNN context handle of the execution context. Will be a valid cuDNN context handle, or
//! nullptr if TacticSource::kCUDNN is disabled.
//! \param cublas The cuBLAS context handle of the execution context. Will be a valid cuBLAS context handle, or
//! nullptr if TacticSource::kCUBLAS is disabled.
//! \param allocator The allocator used by the execution context
//!
//! This function is called automatically for each plugin when a new execution context is created. If the context
//! was created without resources, this method is not called until the resources are assigned. It is also called if
//! new resources are assigned to the context.
//!
//! If the plugin needs per-context resource, it can be allocated here.
//! The plugin can also get context-owned cuDNN and cuBLAS context here.
//!
//! \note The TacticSource::kCUDNN and TacticSource::kCUBLAS flag is disabled by default.
//! The allocator pointer is unique to each building or execution context instance having overlapping lifetimes.
//! It can be used as a key to manage resources across plugin instances sharing the same context.
//! Plugins attached to different contexts will have different handles as their execution will not overlap.
//!
//! \see TacticSources
//! \see getPluginCudnnHandle(void* executionContextIdentifier)
//! \see getPluginCublasHandle(void* excecutionContextIdentifier)
//!
//! \note In the automotive safety context, the cuDNN and cuBLAS parameters will be nullptr because cuDNN and cuBLAS
//! are not used by the safe runtime.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual void attachToContext(
cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, IGpuAllocator* /*allocator*/) noexcept
{
}
//!
//! \brief Detach the plugin object from its execution context.
//!
//! This function is called automatically for each plugin when an execution context is destroyed or the context
//! resources are unassigned from the context.
//!
//! If the plugin owns per-context resource, it can be released here.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual void detachFromContext() noexcept {}
//!
//! \brief Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin
//! object with these parameters. If the source plugin is pre-configured with configurePlugin(), the returned object
//! must also be pre-configured. The returned object must allow attachToContext() with a new execution context.
//! Cloned plugin objects can share the same per-engine immutable resource (e.g. weights) with the source object
//! (e.g. via ref-counting) to avoid duplication.
//!
//! \return A pointer to a cloned plugin object if cloning was successful, otherwise nullptr.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
IPluginV2Ext* clone() const noexcept override = 0;
protected:
// @cond SuppressDoxyWarnings
IPluginV2Ext(IPluginV2Ext const&) = default;
IPluginV2Ext(IPluginV2Ext&&) = default;
IPluginV2Ext& operator=(IPluginV2Ext const&) & = default;
IPluginV2Ext& operator=(IPluginV2Ext&&) & = default;
// @endcond
//!
//! \brief Return the API version with which this plugin was built. The
//! upper byte reserved by TensorRT and is used to differentiate this from IPluginV2.
//!
//! \return In the lower three bytes, the TensorRT version in the format
//! (major * 100 + minor) * 100 + patch.
//! In the upper byte, the value 1.
//!
//! Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with
//! plugins.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, the implementation provided here is safe to call from any thread.
//!
int32_t getTensorRTVersion() const noexcept override
{
return static_cast<int32_t>((static_cast<uint32_t>(PluginVersion::kV2_EXT) << 24U)
| (static_cast<uint32_t>(NV_TENSORRT_VERSION) & 0xFFFFFFU));
}
//!
//! \brief Derived classes must not implement this. In a C++11 API it would be override final.
//!
//! IPluginV2Ext::configureWithFormat() is a NOP operation for all classes derived from IPluginV2Ext.
//! These classes call configurePlugin() instead.
//!
void configureWithFormat(Dims const* /*inputDims*/, int32_t /*nbInputs*/, Dims const* /*outputDims*/,
int32_t /*nbOutputs*/, DataType /*type*/, PluginFormat /*format*/, int32_t /*maxBatchSize*/) noexcept override
{
}
};
//!
//! \class IPluginV2IOExt
//!
//! \brief Plugin class for user-implemented layers.
//!
//! Plugins are a mechanism for applications to implement custom layers. This interface provides additional
//! capabilities to the IPluginV2Ext interface by extending different I/O data types and tensor formats.
//!
//! \see IPluginV2Ext
//!
//! \deprecated Deprecated in TensorRT 10.0. Implement IPluginV3 instead.
//!
class TRT_DEPRECATED IPluginV2IOExt : public IPluginV2Ext
{
public:
//!
//! \brief Configure the layer.
//!
//! This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
//! algorithm choices on the basis of the provided I/O PluginTensorDesc.
//!
//! \param in The input tensors attributes that are used for configuration.
//! \param nbInput Number of input tensors.
//! \param out The output tensors attributes that are used for configuration.
//! \param nbOutput Number of output tensors.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin. However, TensorRT
//! will not call this method from two threads simultaneously on a given clone of a plugin.
//!
virtual void configurePlugin(
PluginTensorDesc const* in, int32_t nbInput, PluginTensorDesc const* out, int32_t nbOutput) noexcept
= 0;
//!
//! \brief Return true if plugin supports the format and datatype for the input/output indexed by pos.
//!
//! For this method inputs are numbered 0..(nbInputs-1) and outputs are numbered nbInputs..(nbInputs+nbOutputs-1).
//! Using this numbering, pos is an index into InOut, where 0 <= pos < nbInputs+nbOutputs.
//!
//! TensorRT invokes this method to ask if the input/output indexed by pos supports the format/datatype specified
//! by inOut[pos].format and inOut[pos].type. The override must return true if that format/datatype at inOut[pos]
//! are supported by the plugin. If support is conditional on other input/output formats/datatypes, the plugin can
//! make its result conditional on the formats/datatypes in inOut[0..pos-1], which will be set to values
//! that the plugin supports. The override must not inspect inOut[pos+1..nbInputs+nbOutputs-1],
//! which will have invalid values. In other words, the decision for pos must be based on inOut[0..pos] only.
//!
//! Some examples:
//!
//! * A definition for a plugin that supports only FP16 NCHW:
//!
//! return inOut.format[pos] == TensorFormat::kLINEAR && inOut.type[pos] == DataType::kHALF;
//!
//! * A definition for a plugin that supports only FP16 NCHW for its two inputs,
//! and FP32 NCHW for its single output:
//!
//! return inOut.format[pos] == TensorFormat::kLINEAR &&
//! (inOut.type[pos] == (pos < 2 ? DataType::kHALF : DataType::kFLOAT));
//!
//! * A definition for a "polymorphic" plugin with two inputs and one output that supports
//! any format or type, but the inputs and output must have the same format and type:
//!
//! return pos == 0 || (inOut.format[pos] == inOut.format[0] && inOut.type[pos] == inOut.type[0]);
//!
//! Warning: TensorRT will stop asking for formats once it finds kFORMAT_COMBINATION_LIMIT on combinations.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual bool supportsFormatCombination(
int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) const noexcept
= 0;
// @cond SuppressDoxyWarnings
IPluginV2IOExt() = default;
~IPluginV2IOExt() override = default;
// @endcond
protected:
// @cond SuppressDoxyWarnings
IPluginV2IOExt(IPluginV2IOExt const&) = default;
IPluginV2IOExt(IPluginV2IOExt&&) = default;
IPluginV2IOExt& operator=(IPluginV2IOExt const&) & = default;
IPluginV2IOExt& operator=(IPluginV2IOExt&&) & = default;
// @endcond
//!
//! \brief Return the API version with which this plugin was built. The upper byte is reserved by TensorRT and is
//! used to differentiate this from IPluginV2 and IPluginV2Ext.
//!
//! Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with
//! plugins.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, the implementation provided here is safe to call from any thread.
//!
int32_t getTensorRTVersion() const noexcept override
{
return static_cast<int32_t>((static_cast<uint32_t>(PluginVersion::kV2_IOEXT) << 24U)
| (static_cast<uint32_t>(NV_TENSORRT_VERSION) & 0xFFFFFFU));
}
private:
// Following are obsolete base class methods, and must not be implemented or used.
//!
//! \brief Set plugin configuration.
//!
void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
bool const*, PluginFormat, int32_t) noexcept final
{
}
//!
//! \brief Check if provided data type is supported.
//!
bool supportsFormat(DataType, PluginFormat) const noexcept final
{
return false;
}
};
namespace v_1_0
{
class TRT_DEPRECATED IPluginCreator : public IPluginCreatorInterface
{
public:
//!
//! \brief Return the plugin name.
//!
//! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including
//! the NULL terminator.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when deserializing
//! multiple engines concurrently sharing plugins.
//!
virtual AsciiChar const* getPluginName() const noexcept = 0;
//!
//! \brief Return the plugin version.
//!
//! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including
//! the NULL terminator.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when deserializing
//! multiple engines concurrently sharing plugins.
//!
virtual AsciiChar const* getPluginVersion() const noexcept = 0;
//!
//! \brief Return a list of fields that need to be passed to createPlugin.
//!
//! \see PluginFieldCollection
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when deserializing
//! multiple engines concurrently sharing plugins.
//!
virtual PluginFieldCollection const* getFieldNames() noexcept = 0;
//!
//! \brief Return a plugin object. Return nullptr in case of error.
//!
//! \param name A NULL-terminated name string of length 1024 or less, including the NULL terminator.
//! \param fc A pointer to a collection of fields needed for constructing the plugin.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when deserializing
//! multiple engines concurrently sharing plugins.
//!
virtual IPluginV2* createPlugin(AsciiChar const* name, PluginFieldCollection const* fc) noexcept = 0;
//!
//! \brief Called during deserialization of plugin layer. Return a plugin object.
//!
//! \param name A NULL-terminated name string of length 1024 or less, including the NULL terminator.
//! \param serialData The start address of a byte array with the serialized plugin representation.
//! \param serialLength The length in bytes of the byte array with the serialized plugin representation.
//!
//! \return A deserialized plugin object
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when deserializing
//! multiple engines concurrently sharing plugins.
//!
virtual IPluginV2* deserializePlugin(AsciiChar const* name, void const* serialData, size_t serialLength) noexcept
= 0;
//!
//! \brief Set the namespace of the plugin creator based on the plugin
//! library it belongs to. This can be set while registering the plugin creator.
//!
//! \param pluginNamespace A NULL-terminated namespace string of length 1024 or less, including the NULL terminator
//!
//! \see IPluginRegistry::registerCreator()
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when deserializing
//! multiple engines concurrently sharing plugins.
//!
virtual void setPluginNamespace(AsciiChar const* pluginNamespace) noexcept = 0;
//!
//! \brief Return the namespace of the plugin creator object.
//!
//! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
//! NULL terminator.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when deserializing
//! multiple engines concurrently sharing plugins.
//!
virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
IPluginCreator() = default;
~IPluginCreator() override = default;
protected:
// @cond SuppressDoxyWarnings
IPluginCreator(IPluginCreator const&) = default;
IPluginCreator(IPluginCreator&&) = default;
IPluginCreator& operator=(IPluginCreator const&) & = default;
IPluginCreator& operator=(IPluginCreator&&) & = default;
// @endcond
public:
//!
//! \brief Return version information associated with this interface. Applications must not override this method.
//!
InterfaceInfo getInterfaceInfo() const noexcept override
{
return InterfaceInfo{"PLUGIN CREATOR_V1", 1, 0};
}
};
} // namespace v_1_0
//!
//! \class IPluginCreator
//!
//! \brief Plugin creator class for user implemented layers.
//!
//! \see IPlugin and IPluginFactory
//!
//! \deprecated Deprecated in TensorRT 10.0. Please implement IPluginCreatorV3One
//! along with IPluginV3 plugins instead.
//!
using IPluginCreator = v_1_0::IPluginCreator;
} // namespace nvinfer1
#endif // NV_INFER_RUNTIME_PLUGIN_H
|