G0-VLA / g0plus_dockerfile /docker-assets /data /TensorRT-10.13.0.35 /samples /python /efficientnet /infer.py
| # | |
| # SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # SPDX-License-Identifier: Apache-2.0 | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # | |
| import os | |
| import sys | |
| import argparse | |
| import numpy as np | |
| import tensorrt as trt | |
| from cuda import cudart | |
| sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) | |
| import common | |
| from image_batcher import ImageBatcher | |
| class TensorRTInfer: | |
| """ | |
| Implements inference for the EfficientNet TensorRT engine. | |
| """ | |
| def __init__(self, engine_path): | |
| """ | |
| :param engine_path: The path to the serialized engine to load from disk. | |
| """ | |
| # Load TRT engine | |
| self.logger = trt.Logger(trt.Logger.ERROR) | |
| with open(engine_path, "rb") as f, trt.Runtime(self.logger) as runtime: | |
| assert runtime | |
| self.engine = runtime.deserialize_cuda_engine(f.read()) | |
| assert self.engine | |
| self.context = self.engine.create_execution_context() | |
| assert self.context | |
| # Setup I/O bindings | |
| self.inputs = [] | |
| self.outputs = [] | |
| self.allocations = [] | |
| for i in range(self.engine.num_io_tensors): | |
| name = self.engine.get_tensor_name(i) | |
| is_input = False | |
| if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT: | |
| is_input = True | |
| dtype = self.engine.get_tensor_dtype(name) | |
| shape = self.engine.get_tensor_shape(name) | |
| if is_input: | |
| self.batch_size = shape[0] | |
| size = np.dtype(trt.nptype(dtype)).itemsize | |
| for s in shape: | |
| size *= s | |
| allocation = common.cuda_call(cudart.cudaMalloc(size)) | |
| binding = { | |
| "index": i, | |
| "name": name, | |
| "dtype": np.dtype(trt.nptype(dtype)), | |
| "shape": list(shape), | |
| "allocation": allocation, | |
| } | |
| self.allocations.append(allocation) | |
| if is_input: | |
| self.inputs.append(binding) | |
| else: | |
| self.outputs.append(binding) | |
| assert self.batch_size > 0 | |
| assert len(self.inputs) > 0 | |
| assert len(self.outputs) > 0 | |
| assert len(self.allocations) > 0 | |
| def input_spec(self): | |
| """ | |
| Get the specs for the input tensor of the network. Useful to prepare memory allocations. | |
| :return: Two items, the shape of the input tensor and its (numpy) datatype. | |
| """ | |
| return self.inputs[0]["shape"], self.inputs[0]["dtype"] | |
| def output_spec(self): | |
| """ | |
| Get the specs for the output tensor of the network. Useful to prepare memory allocations. | |
| :return: Two items, the shape of the output tensor and its (numpy) datatype. | |
| """ | |
| return self.outputs[0]["shape"], self.outputs[0]["dtype"] | |
| def infer(self, batch, top=1): | |
| """ | |
| Execute inference on a batch of images. The images should already be batched and preprocessed, as prepared by | |
| the ImageBatcher class. Memory copying to and from the GPU device will be performed here. | |
| :param batch: A numpy array holding the image batch. | |
| :param top: The number of classes to return as top_predicitons, in descending order by their score. By default, | |
| setting to one will return the same as the maximum score class. Useful for Top-5 accuracy metrics in validation. | |
| :return: Three items, as numpy arrays for each batch image: The maximum score class, the corresponding maximum | |
| score, and a list of the top N classes and scores. | |
| """ | |
| # Prepare the output data | |
| output = np.zeros(*self.output_spec()) | |
| # Process I/O and execute the network | |
| common.memcpy_host_to_device( | |
| self.inputs[0]["allocation"], np.ascontiguousarray(batch) | |
| ) | |
| self.context.execute_v2(self.allocations) | |
| common.memcpy_device_to_host(output, self.outputs[0]["allocation"]) | |
| # Process the results | |
| classes = np.argmax(output, axis=1) | |
| scores = np.max(output, axis=1) | |
| top = min(top, output.shape[1]) | |
| top_classes = np.flip(np.argsort(output, axis=1), axis=1)[:, 0:top] | |
| top_scores = np.flip(np.sort(output, axis=1), axis=1)[:, 0:top] | |
| return classes, scores, [top_classes, top_scores] | |
| def main(args): | |
| trt_infer = TensorRTInfer(args.engine) | |
| batcher = ImageBatcher( | |
| args.input, *trt_infer.input_spec(), preprocessor=args.preprocessor | |
| ) | |
| for batch, images in batcher.get_batch(): | |
| classes, scores, top = trt_infer.infer(batch) | |
| for i in range(len(images)): | |
| if args.top == 1: | |
| print(images[i], classes[i], scores[i], sep=args.separator) | |
| else: | |
| line = [images[i]] | |
| assert args.top <= top[0].shape[1] | |
| for t in range(args.top): | |
| line.append(str(top[0][i][t])) | |
| for t in range(args.top): | |
| line.append(str(top[1][i][t])) | |
| print(args.separator.join(line)) | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with") | |
| parser.add_argument( | |
| "-i", | |
| "--input", | |
| help="The input to infer, either a single image path, or a directory of images", | |
| ) | |
| parser.add_argument( | |
| "-t", | |
| "--top", | |
| default=1, | |
| type=int, | |
| help="The amount of top classes and scores to output per image, default: 1", | |
| ) | |
| parser.add_argument( | |
| "-s", | |
| "--separator", | |
| default="\t", | |
| help="Separator to use between columns when printing the results, default: \\t", | |
| ) | |
| parser.add_argument( | |
| "-p", | |
| "--preprocessor", | |
| default="V2", | |
| choices=["V1", "V1MS", "V2"], | |
| help="Select the image preprocessor to use, either 'V2', 'V1' or 'V1MS', default: V2", | |
| ) | |
| args = parser.parse_args() | |
| if not all([args.engine, args.input]): | |
| parser.print_help() | |
| print("\nThese arguments are required: --engine and --input") | |
| sys.exit(1) | |
| main(args) | |