| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
|
|
| import os |
| import sys |
| import six |
| import glob |
| import time |
| import yaml |
| import argparse |
| import cv2 |
| import numpy as np |
|
|
| import paddle |
| import paddle.version as paddle_version |
| from paddle.inference import Config, create_predictor, PrecisionType, get_trt_runtime_version |
|
|
| TUNED_TRT_DYNAMIC_MODELS = {'DETR'} |
|
|
|
|
| def check_version(version='2.2'): |
| err = "PaddlePaddle version {} or higher is required, " \ |
| "or a suitable develop version is satisfied as well. \n" \ |
| "Please make sure the version is good with your code.".format(version) |
|
|
| version_installed = [ |
| paddle_version.major, paddle_version.minor, paddle_version.patch, |
| paddle_version.rc |
| ] |
|
|
| if version_installed == ['0', '0', '0', '0']: |
| return |
|
|
| if version == 'develop': |
| raise Exception("PaddlePaddle develop version is required!") |
|
|
| version_split = version.split('.') |
|
|
| length = min(len(version_installed), len(version_split)) |
| for i in six.moves.range(length): |
| if version_installed[i] > version_split[i]: |
| return |
| if version_installed[i] < version_split[i]: |
| raise Exception(err) |
|
|
|
|
| def check_trt_version(version='8.2'): |
| err = "TensorRT version {} or higher is required," \ |
| "Please make sure the version is good with your code.".format(version) |
| version_split = list(map(int, version.split('.'))) |
| version_installed = get_trt_runtime_version() |
| length = min(len(version_installed), len(version_split)) |
| for i in six.moves.range(length): |
| if version_installed[i] > version_split[i]: |
| return |
| if version_installed[i] < version_split[i]: |
| raise Exception(err) |
|
|
|
|
| |
| def decode_image(im_file, im_info): |
| if isinstance(im_file, str): |
| with open(im_file, 'rb') as f: |
| im_read = f.read() |
| data = np.frombuffer(im_read, dtype='uint8') |
| im = cv2.imdecode(data, 1) |
| im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) |
| else: |
| im = im_file |
| im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32) |
| im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32) |
| return im, im_info |
|
|
|
|
| class Resize(object): |
| def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR): |
| if isinstance(target_size, int): |
| target_size = [target_size, target_size] |
| self.target_size = target_size |
| self.keep_ratio = keep_ratio |
| self.interp = interp |
|
|
| def __call__(self, im, im_info): |
| assert len(self.target_size) == 2 |
| assert self.target_size[0] > 0 and self.target_size[1] > 0 |
| im_channel = im.shape[2] |
| im_scale_y, im_scale_x = self.generate_scale(im) |
| im = cv2.resize( |
| im, |
| None, |
| None, |
| fx=im_scale_x, |
| fy=im_scale_y, |
| interpolation=self.interp) |
| im_info['im_shape'] = np.array(im.shape[:2]).astype('float32') |
| im_info['scale_factor'] = np.array( |
| [im_scale_y, im_scale_x]).astype('float32') |
| return im, im_info |
|
|
| def generate_scale(self, im): |
| origin_shape = im.shape[:2] |
| im_c = im.shape[2] |
| if self.keep_ratio: |
| im_size_min = np.min(origin_shape) |
| im_size_max = np.max(origin_shape) |
| target_size_min = np.min(self.target_size) |
| target_size_max = np.max(self.target_size) |
| im_scale = float(target_size_min) / float(im_size_min) |
| if np.round(im_scale * im_size_max) > target_size_max: |
| im_scale = float(target_size_max) / float(im_size_max) |
| im_scale_x = im_scale |
| im_scale_y = im_scale |
| else: |
| resize_h, resize_w = self.target_size |
| im_scale_y = resize_h / float(origin_shape[0]) |
| im_scale_x = resize_w / float(origin_shape[1]) |
| return im_scale_y, im_scale_x |
|
|
|
|
| class Permute(object): |
| def __init__(self, ): |
| super(Permute, self).__init__() |
|
|
| def __call__(self, im, im_info): |
| im = im.transpose((2, 0, 1)) |
| return im, im_info |
|
|
|
|
| class NormalizeImage(object): |
| def __init__(self, mean, std, is_scale=True, norm_type='mean_std'): |
| self.mean = mean |
| self.std = std |
| self.is_scale = is_scale |
| self.norm_type = norm_type |
|
|
| def __call__(self, im, im_info): |
| im = im.astype(np.float32, copy=False) |
| if self.is_scale: |
| scale = 1.0 / 255.0 |
| im *= scale |
|
|
| if self.norm_type == 'mean_std': |
| mean = np.array(self.mean)[np.newaxis, np.newaxis, :] |
| std = np.array(self.std)[np.newaxis, np.newaxis, :] |
| im -= mean |
| im /= std |
| return im, im_info |
|
|
|
|
| class PadStride(object): |
| def __init__(self, stride=0): |
| self.coarsest_stride = stride |
|
|
| def __call__(self, im, im_info): |
| coarsest_stride = self.coarsest_stride |
| if coarsest_stride <= 0: |
| return im, im_info |
| im_c, im_h, im_w = im.shape |
| pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) |
| pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) |
| padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) |
| padding_im[:, :im_h, :im_w] = im |
| return padding_im, im_info |
|
|
|
|
| def preprocess(im, preprocess_ops): |
| |
| im_info = { |
| 'scale_factor': np.array( |
| [1., 1.], dtype=np.float32), |
| 'im_shape': None, |
| } |
| im, im_info = decode_image(im, im_info) |
| for operator in preprocess_ops: |
| im, im_info = operator(im, im_info) |
| return im, im_info |
|
|
|
|
| def parse_args(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument( |
| '--model_dir', type=str, help='directory of inference model') |
| parser.add_argument( |
| '--run_mode', type=str, default='paddle', help='running mode') |
| parser.add_argument('--batch_size', type=int, default=1, help='batch size') |
| parser.add_argument( |
| '--image_dir', |
| type=str, |
| default='/paddle/data/DOTA_1024_ss/test1024/images', |
| help='directory of test images') |
| parser.add_argument( |
| '--warmup_iter', type=int, default=5, help='num of warmup iters') |
| parser.add_argument( |
| '--total_iter', type=int, default=2000, help='num of total iters') |
| parser.add_argument( |
| '--log_iter', type=int, default=50, help='num of log interval') |
| parser.add_argument( |
| '--tuned_trt_shape_file', |
| type=str, |
| default='shape_range_info.pbtxt', |
| help='dynamic shape range info') |
| args = parser.parse_args() |
| return args |
|
|
|
|
| def init_predictor(FLAGS): |
| model_dir, run_mode, batch_size = FLAGS.model_dir, FLAGS.run_mode, FLAGS.batch_size |
| yaml_file = os.path.join(model_dir, 'infer_cfg.yml') |
| with open(yaml_file) as f: |
| yml_conf = yaml.safe_load(f) |
|
|
| config = Config( |
| os.path.join(model_dir, 'model.pdmodel'), |
| os.path.join(model_dir, 'model.pdiparams')) |
|
|
| |
| config.enable_use_gpu(200, 0) |
| |
| config.switch_ir_optim(True) |
|
|
| precision_map = { |
| 'trt_int8': Config.Precision.Int8, |
| 'trt_fp32': Config.Precision.Float32, |
| 'trt_fp16': Config.Precision.Half |
| } |
|
|
| arch = yml_conf['arch'] |
| tuned_trt_shape_file = os.path.join(model_dir, FLAGS.tuned_trt_shape_file) |
|
|
| if run_mode in precision_map.keys(): |
| if arch in TUNED_TRT_DYNAMIC_MODELS and not os.path.exists( |
| tuned_trt_shape_file): |
| print( |
| 'dynamic shape range info is saved in {}. After that, rerun the code'. |
| format(tuned_trt_shape_file)) |
| config.collect_shape_range_info(tuned_trt_shape_file) |
| config.enable_tensorrt_engine( |
| workspace_size=(1 << 25) * batch_size, |
| max_batch_size=batch_size, |
| min_subgraph_size=yml_conf['min_subgraph_size'], |
| precision_mode=precision_map[run_mode], |
| use_static=True, |
| use_calib_mode=False) |
|
|
| if yml_conf['use_dynamic_shape']: |
| if arch in TUNED_TRT_DYNAMIC_MODELS and os.path.exists( |
| tuned_trt_shape_file): |
| config.enable_tuned_tensorrt_dynamic_shape(tuned_trt_shape_file, |
| True) |
| else: |
| min_input_shape = { |
| 'image': [batch_size, 3, 640, 640], |
| 'scale_factor': [batch_size, 2] |
| } |
| max_input_shape = { |
| 'image': [batch_size, 3, 1280, 1280], |
| 'scale_factor': [batch_size, 2] |
| } |
| opt_input_shape = { |
| 'image': [batch_size, 3, 1024, 1024], |
| 'scale_factor': [batch_size, 2] |
| } |
| config.set_trt_dynamic_shape_info( |
| min_input_shape, max_input_shape, opt_input_shape) |
|
|
| |
| config.disable_glog_info() |
| |
| config.enable_memory_optim() |
| |
| config.switch_use_feed_fetch_ops(False) |
| predictor = create_predictor(config) |
| return predictor, yml_conf |
|
|
|
|
| def create_preprocess_ops(yml_conf): |
| preprocess_ops = [] |
| for op_info in yml_conf['Preprocess']: |
| new_op_info = op_info.copy() |
| op_type = new_op_info.pop('type') |
| preprocess_ops.append(eval(op_type)(**new_op_info)) |
| return preprocess_ops |
|
|
|
|
| def get_test_images(image_dir): |
| images = set() |
| infer_dir = os.path.abspath(image_dir) |
| exts = ['jpg', 'jpeg', 'png', 'bmp'] |
| exts += [ext.upper() for ext in exts] |
| for ext in exts: |
| images.update(glob.glob('{}/*.{}'.format(infer_dir, ext))) |
| images = list(images) |
| return images |
|
|
|
|
| def create_inputs(image_files, preprocess_ops): |
| inputs = dict() |
| im_list, im_info_list = [], [] |
| for im_path in image_files: |
| im, im_info = preprocess(im_path, preprocess_ops) |
| im_list.append(im) |
| im_info_list.append(im_info) |
|
|
| inputs['im_shape'] = np.stack( |
| [e['im_shape'] for e in im_info_list], axis=0).astype('float32') |
| inputs['scale_factor'] = np.stack( |
| [e['scale_factor'] for e in im_info_list], axis=0).astype('float32') |
| inputs['image'] = np.stack(im_list, axis=0).astype('float32') |
| return inputs |
|
|
|
|
| def measure_speed(FLAGS): |
| predictor, yml_conf = init_predictor(FLAGS) |
| input_names = predictor.get_input_names() |
| preprocess_ops = create_preprocess_ops(yml_conf) |
|
|
| image_files = get_test_images(FLAGS.image_dir) |
|
|
| batch_size = FLAGS.batch_size |
| warmup_iter, log_iter, total_iter = FLAGS.warmup_iter, FLAGS.log_iter, FLAGS.total_iter |
|
|
| total_time = 0 |
| fps = 0 |
| for i in range(0, total_iter, batch_size): |
| |
| inputs = create_inputs(image_files[i:i + batch_size], preprocess_ops) |
| for name in input_names: |
| input_tensor = predictor.get_input_handle(name) |
| input_tensor.copy_from_cpu(inputs[name]) |
|
|
| paddle.device.cuda.synchronize() |
| |
| start_time = time.perf_counter() |
| predictor.run() |
| paddle.device.cuda.synchronize() |
|
|
| if i >= warmup_iter: |
| total_time += time.perf_counter() - start_time |
| if (i + 1) % log_iter == 0: |
| fps = (i + 1 - warmup_iter) / total_time |
| print( |
| f'Done image [{i + 1:<3}/ {total_iter}], ' |
| f'fps: {fps:.1f} img / s, ' |
| f'times per image: {1000 / fps:.1f} ms / img', |
| flush=True) |
|
|
| if (i + 1) == total_iter: |
| fps = (i + 1 - warmup_iter) / total_time |
| print( |
| f'Overall fps: {fps:.1f} img / s, ' |
| f'times per image: {1000 / fps:.1f} ms / img', |
| flush=True) |
| break |
|
|
|
|
| if __name__ == '__main__': |
| FLAGS = parse_args() |
| if 'trt' in FLAGS.run_mode: |
| check_version('develop') |
| check_trt_version('8.2') |
| else: |
| check_version('2.4') |
| measure_speed(FLAGS) |
|
|