| | |
| | """pytest tests/test_forward.py.""" |
| | import copy |
| | from os.path import dirname, exists, join |
| |
|
| | import numpy as np |
| | import pytest |
| | import torch |
| |
|
| |
|
| | def _get_config_directory(): |
| | """Find the predefined detector config directory.""" |
| | try: |
| | |
| | repo_dpath = dirname(dirname(dirname(__file__))) |
| | except NameError: |
| | |
| | import mmdet |
| | repo_dpath = dirname(dirname(mmdet.__file__)) |
| | config_dpath = join(repo_dpath, 'configs') |
| | if not exists(config_dpath): |
| | raise Exception('Cannot find config path') |
| | return config_dpath |
| |
|
| |
|
| | def _get_config_module(fname): |
| | """Load a configuration as a python module.""" |
| | from mmcv import Config |
| | config_dpath = _get_config_directory() |
| | config_fpath = join(config_dpath, fname) |
| | config_mod = Config.fromfile(config_fpath) |
| | return config_mod |
| |
|
| |
|
| | def _get_detector_cfg(fname): |
| | """Grab configs necessary to create a detector. |
| | |
| | These are deep copied to allow for safe modification of parameters without |
| | influencing other tests. |
| | """ |
| | config = _get_config_module(fname) |
| | model = copy.deepcopy(config.model) |
| | return model |
| |
|
| |
|
| | def _replace_r50_with_r18(model): |
| | """Replace ResNet50 with ResNet18 in config.""" |
| | model = copy.deepcopy(model) |
| | if model.backbone.type == 'ResNet': |
| | model.backbone.depth = 18 |
| | model.backbone.base_channels = 2 |
| | model.neck.in_channels = [2, 4, 8, 16] |
| | return model |
| |
|
| |
|
| | def test_sparse_rcnn_forward(): |
| | config_path = 'sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py' |
| | model = _get_detector_cfg(config_path) |
| | model = _replace_r50_with_r18(model) |
| | model.backbone.init_cfg = None |
| | from mmdet.models import build_detector |
| | detector = build_detector(model) |
| | detector.init_weights() |
| | input_shape = (1, 3, 100, 100) |
| | mm_inputs = _demo_mm_inputs(input_shape, num_items=[5]) |
| | imgs = mm_inputs.pop('imgs') |
| | img_metas = mm_inputs.pop('img_metas') |
| | |
| | detector.train() |
| | gt_bboxes = mm_inputs['gt_bboxes'] |
| | gt_bboxes = [item for item in gt_bboxes] |
| | gt_labels = mm_inputs['gt_labels'] |
| | gt_labels = [item for item in gt_labels] |
| | losses = detector.forward( |
| | imgs, |
| | img_metas, |
| | gt_bboxes=gt_bboxes, |
| | gt_labels=gt_labels, |
| | return_loss=True) |
| | assert isinstance(losses, dict) |
| | loss, _ = detector._parse_losses(losses) |
| | assert float(loss.item()) > 0 |
| | detector.forward_dummy(imgs) |
| |
|
| | |
| | mm_inputs = _demo_mm_inputs(input_shape, num_items=[0]) |
| | imgs = mm_inputs.pop('imgs') |
| | img_metas = mm_inputs.pop('img_metas') |
| | gt_bboxes = mm_inputs['gt_bboxes'] |
| | gt_bboxes = [item for item in gt_bboxes] |
| | gt_labels = mm_inputs['gt_labels'] |
| | gt_labels = [item for item in gt_labels] |
| | losses = detector.forward( |
| | imgs, |
| | img_metas, |
| | gt_bboxes=gt_bboxes, |
| | gt_labels=gt_labels, |
| | return_loss=True) |
| | assert isinstance(losses, dict) |
| | loss, _ = detector._parse_losses(losses) |
| | assert float(loss.item()) > 0 |
| |
|
| | |
| | detector.eval() |
| | with torch.no_grad(): |
| | img_list = [g[None, :] for g in imgs] |
| | batch_results = [] |
| | for one_img, one_meta in zip(img_list, img_metas): |
| | result = detector.forward([one_img], [[one_meta]], |
| | rescale=True, |
| | return_loss=False) |
| | batch_results.append(result) |
| |
|
| | |
| | with torch.no_grad(): |
| | |
| | detector.roi_head.simple_test([imgs[0][None, :]], torch.empty( |
| | (1, 0, 4)), torch.empty((1, 100, 4)), [img_metas[0]], |
| | torch.ones((1, 4))) |
| |
|
| |
|
| | def test_rpn_forward(): |
| | model = _get_detector_cfg('rpn/rpn_r50_fpn_1x_coco.py') |
| | model = _replace_r50_with_r18(model) |
| | model.backbone.init_cfg = None |
| |
|
| | from mmdet.models import build_detector |
| | detector = build_detector(model) |
| |
|
| | input_shape = (1, 3, 100, 100) |
| | mm_inputs = _demo_mm_inputs(input_shape) |
| |
|
| | imgs = mm_inputs.pop('imgs') |
| | img_metas = mm_inputs.pop('img_metas') |
| |
|
| | |
| | gt_bboxes = mm_inputs['gt_bboxes'] |
| | losses = detector.forward( |
| | imgs, img_metas, gt_bboxes=gt_bboxes, return_loss=True) |
| | assert isinstance(losses, dict) |
| |
|
| | |
| | with torch.no_grad(): |
| | img_list = [g[None, :] for g in imgs] |
| | batch_results = [] |
| | for one_img, one_meta in zip(img_list, img_metas): |
| | result = detector.forward([one_img], [[one_meta]], |
| | return_loss=False) |
| | batch_results.append(result) |
| |
|
| |
|
| | @pytest.mark.parametrize( |
| | 'cfg_file', |
| | [ |
| | 'reppoints/reppoints_moment_r50_fpn_1x_coco.py', |
| | 'retinanet/retinanet_r50_fpn_1x_coco.py', |
| | 'guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py', |
| | 'ghm/retinanet_ghm_r50_fpn_1x_coco.py', |
| | 'fcos/fcos_center_r50_caffe_fpn_gn-head_1x_coco.py', |
| | 'foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py', |
| | |
| | |
| | 'yolo/yolov3_mobilenetv2_320_300e_coco.py', |
| | 'yolox/yolox_tiny_8x8_300e_coco.py' |
| | ]) |
| | def test_single_stage_forward_gpu(cfg_file): |
| | if not torch.cuda.is_available(): |
| | import pytest |
| | pytest.skip('test requires GPU and torch+cuda') |
| |
|
| | model = _get_detector_cfg(cfg_file) |
| | model = _replace_r50_with_r18(model) |
| | model.backbone.init_cfg = None |
| |
|
| | from mmdet.models import build_detector |
| | detector = build_detector(model) |
| |
|
| | input_shape = (2, 3, 128, 128) |
| | mm_inputs = _demo_mm_inputs(input_shape) |
| |
|
| | imgs = mm_inputs.pop('imgs') |
| | img_metas = mm_inputs.pop('img_metas') |
| |
|
| | detector = detector.cuda() |
| | imgs = imgs.cuda() |
| | |
| | gt_bboxes = [b.cuda() for b in mm_inputs['gt_bboxes']] |
| | gt_labels = [g.cuda() for g in mm_inputs['gt_labels']] |
| | losses = detector.forward( |
| | imgs, |
| | img_metas, |
| | gt_bboxes=gt_bboxes, |
| | gt_labels=gt_labels, |
| | return_loss=True) |
| | assert isinstance(losses, dict) |
| |
|
| | |
| | detector.eval() |
| | with torch.no_grad(): |
| | img_list = [g[None, :] for g in imgs] |
| | batch_results = [] |
| | for one_img, one_meta in zip(img_list, img_metas): |
| | result = detector.forward([one_img], [[one_meta]], |
| | return_loss=False) |
| | batch_results.append(result) |
| |
|
| |
|
| | def test_faster_rcnn_ohem_forward(): |
| | model = _get_detector_cfg( |
| | 'faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py') |
| | model = _replace_r50_with_r18(model) |
| | model.backbone.init_cfg = None |
| |
|
| | from mmdet.models import build_detector |
| | detector = build_detector(model) |
| |
|
| | input_shape = (1, 3, 100, 100) |
| |
|
| | |
| | mm_inputs = _demo_mm_inputs(input_shape, num_items=[10]) |
| | imgs = mm_inputs.pop('imgs') |
| | img_metas = mm_inputs.pop('img_metas') |
| | gt_bboxes = mm_inputs['gt_bboxes'] |
| | gt_labels = mm_inputs['gt_labels'] |
| | losses = detector.forward( |
| | imgs, |
| | img_metas, |
| | gt_bboxes=gt_bboxes, |
| | gt_labels=gt_labels, |
| | return_loss=True) |
| | assert isinstance(losses, dict) |
| | loss, _ = detector._parse_losses(losses) |
| | assert float(loss.item()) > 0 |
| |
|
| | |
| | mm_inputs = _demo_mm_inputs(input_shape, num_items=[0]) |
| | imgs = mm_inputs.pop('imgs') |
| | img_metas = mm_inputs.pop('img_metas') |
| | gt_bboxes = mm_inputs['gt_bboxes'] |
| | gt_labels = mm_inputs['gt_labels'] |
| | losses = detector.forward( |
| | imgs, |
| | img_metas, |
| | gt_bboxes=gt_bboxes, |
| | gt_labels=gt_labels, |
| | return_loss=True) |
| | assert isinstance(losses, dict) |
| | loss, _ = detector._parse_losses(losses) |
| | assert float(loss.item()) > 0 |
| |
|
| | |
| | feature = detector.extract_feat(imgs[0][None, :]) |
| | losses = detector.roi_head.forward_train( |
| | feature, |
| | img_metas, [torch.empty((0, 5))], |
| | gt_bboxes=gt_bboxes, |
| | gt_labels=gt_labels) |
| | assert isinstance(losses, dict) |
| |
|
| |
|
| | @pytest.mark.parametrize( |
| | 'cfg_file', |
| | [ |
| | |
| | 'mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py', |
| | |
| | |
| | |
| | |
| | |
| | |
| | ]) |
| | def test_two_stage_forward(cfg_file): |
| | models_with_semantic = [ |
| | 'htc/htc_r50_fpn_1x_coco.py', |
| | 'panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py', |
| | 'scnet/scnet_r50_fpn_20e_coco.py', |
| | ] |
| | if cfg_file in models_with_semantic: |
| | with_semantic = True |
| | else: |
| | with_semantic = False |
| |
|
| | model = _get_detector_cfg(cfg_file) |
| | model = _replace_r50_with_r18(model) |
| | model.backbone.init_cfg = None |
| |
|
| | |
| | if cfg_file in [ |
| | 'seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py' |
| | ]: |
| | model.roi_head.bbox_head.num_classes = 80 |
| | model.roi_head.bbox_head.loss_cls.num_classes = 80 |
| | model.roi_head.mask_head.num_classes = 80 |
| | model.test_cfg.rcnn.score_thr = 0.05 |
| | model.test_cfg.rcnn.max_per_img = 100 |
| |
|
| | from mmdet.models import build_detector |
| | detector = build_detector(model) |
| |
|
| | input_shape = (1, 3, 128, 128) |
| |
|
| | |
| | mm_inputs = _demo_mm_inputs( |
| | input_shape, num_items=[10], with_semantic=with_semantic) |
| | imgs = mm_inputs.pop('imgs') |
| | img_metas = mm_inputs.pop('img_metas') |
| | losses = detector.forward(imgs, img_metas, return_loss=True, **mm_inputs) |
| | assert isinstance(losses, dict) |
| | loss, _ = detector._parse_losses(losses) |
| | loss.requires_grad_(True) |
| | assert float(loss.item()) > 0 |
| | loss.backward() |
| |
|
| | |
| | mm_inputs = _demo_mm_inputs( |
| | input_shape, num_items=[0], with_semantic=with_semantic) |
| | imgs = mm_inputs.pop('imgs') |
| | img_metas = mm_inputs.pop('img_metas') |
| | losses = detector.forward(imgs, img_metas, return_loss=True, **mm_inputs) |
| | assert isinstance(losses, dict) |
| | loss, _ = detector._parse_losses(losses) |
| | loss.requires_grad_(True) |
| | assert float(loss.item()) > 0 |
| | loss.backward() |
| |
|
| | |
| | if cfg_file in [ |
| | 'panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py' |
| | ]: |
| | mm_inputs.pop('gt_semantic_seg') |
| |
|
| | feature = detector.extract_feat(imgs[0][None, :]) |
| | losses = detector.roi_head.forward_train(feature, img_metas, |
| | [torch.empty( |
| | (0, 5))], **mm_inputs) |
| | assert isinstance(losses, dict) |
| |
|
| | |
| | with torch.no_grad(): |
| | img_list = [g[None, :] for g in imgs] |
| | batch_results = [] |
| | for one_img, one_meta in zip(img_list, img_metas): |
| | result = detector.forward([one_img], [[one_meta]], |
| | return_loss=False) |
| | batch_results.append(result) |
| | cascade_models = [ |
| | 'cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py', |
| | 'htc/htc_r50_fpn_1x_coco.py', |
| | 'scnet/scnet_r50_fpn_20e_coco.py', |
| | ] |
| | |
| | with torch.no_grad(): |
| | |
| | detector.simple_test( |
| | imgs[0][None, :], [img_metas[0]], proposals=[torch.empty((0, 4))]) |
| |
|
| | |
| | features = detector.extract_feats([imgs[0][None, :]] * 2) |
| | detector.roi_head.aug_test(features, [torch.empty((0, 4))] * 2, |
| | [[img_metas[0]]] * 2) |
| |
|
| | |
| | if cfg_file not in cascade_models: |
| | feature = detector.extract_feat(imgs[0][None, :]) |
| | bboxes, scores = detector.roi_head.simple_test_bboxes( |
| | feature, [img_metas[0]], [torch.empty((0, 4))], None) |
| | assert all([bbox.shape == torch.Size((0, 4)) for bbox in bboxes]) |
| | assert all([ |
| | score.shape == torch.Size( |
| | (0, detector.roi_head.bbox_head.fc_cls.out_features)) |
| | for score in scores |
| | ]) |
| |
|
| | |
| | x1y1 = torch.randint(1, 100, (10, 2)).float() |
| | |
| | x2y2 = x1y1 + torch.randint(1, 100, (10, 2)) |
| | detector.simple_test( |
| | imgs[0][None, :].repeat(2, 1, 1, 1), [img_metas[0]] * 2, |
| | proposals=[torch.empty((0, 4)), |
| | torch.cat([x1y1, x2y2], dim=-1)]) |
| |
|
| | |
| | detector.roi_head.aug_test( |
| | features, [torch.cat([x1y1, x2y2], dim=-1), |
| | torch.empty((0, 4))], [[img_metas[0]]] * 2) |
| |
|
| | |
| | if cfg_file not in cascade_models: |
| | feature = detector.extract_feat(imgs[0][None, :].repeat( |
| | 2, 1, 1, 1)) |
| | bboxes, scores = detector.roi_head.simple_test_bboxes( |
| | feature, [img_metas[0]] * 2, |
| | [torch.empty((0, 4)), |
| | torch.cat([x1y1, x2y2], dim=-1)], None) |
| | assert bboxes[0].shape == torch.Size((0, 4)) |
| | assert scores[0].shape == torch.Size( |
| | (0, detector.roi_head.bbox_head.fc_cls.out_features)) |
| |
|
| |
|
| | @pytest.mark.parametrize( |
| | 'cfg_file', ['ghm/retinanet_ghm_r50_fpn_1x_coco.py', 'ssd/ssd300_coco.py']) |
| | def test_single_stage_forward_cpu(cfg_file): |
| | model = _get_detector_cfg(cfg_file) |
| | model = _replace_r50_with_r18(model) |
| | model.backbone.init_cfg = None |
| |
|
| | from mmdet.models import build_detector |
| | detector = build_detector(model) |
| |
|
| | input_shape = (1, 3, 300, 300) |
| | mm_inputs = _demo_mm_inputs(input_shape) |
| |
|
| | imgs = mm_inputs.pop('imgs') |
| | img_metas = mm_inputs.pop('img_metas') |
| |
|
| | |
| | gt_bboxes = mm_inputs['gt_bboxes'] |
| | gt_labels = mm_inputs['gt_labels'] |
| | losses = detector.forward( |
| | imgs, |
| | img_metas, |
| | gt_bboxes=gt_bboxes, |
| | gt_labels=gt_labels, |
| | return_loss=True) |
| | assert isinstance(losses, dict) |
| |
|
| | |
| | detector.eval() |
| | with torch.no_grad(): |
| | img_list = [g[None, :] for g in imgs] |
| | batch_results = [] |
| | for one_img, one_meta in zip(img_list, img_metas): |
| | result = detector.forward([one_img], [[one_meta]], |
| | return_loss=False) |
| | batch_results.append(result) |
| |
|
| |
|
| | def _demo_mm_inputs(input_shape=(1, 3, 300, 300), |
| | num_items=None, num_classes=10, |
| | with_semantic=False): |
| | """Create a superset of inputs needed to run test or train batches. |
| | |
| | Args: |
| | input_shape (tuple): |
| | input batch dimensions |
| | |
| | num_items (None | List[int]): |
| | specifies the number of boxes in each batch item |
| | |
| | num_classes (int): |
| | number of different labels a box might have |
| | """ |
| | from mmdet.core import BitmapMasks |
| |
|
| | (N, C, H, W) = input_shape |
| |
|
| | rng = np.random.RandomState(0) |
| |
|
| | imgs = rng.rand(*input_shape) |
| |
|
| | img_metas = [{ |
| | 'img_shape': (H, W, C), |
| | 'ori_shape': (H, W, C), |
| | 'pad_shape': (H, W, C), |
| | 'filename': '<demo>.png', |
| | 'scale_factor': np.array([1.1, 1.2, 1.1, 1.2]), |
| | 'flip': False, |
| | 'flip_direction': None, |
| | } for _ in range(N)] |
| |
|
| | gt_bboxes = [] |
| | gt_labels = [] |
| | gt_masks = [] |
| |
|
| | for batch_idx in range(N): |
| | if num_items is None: |
| | num_boxes = rng.randint(1, 10) |
| | else: |
| | num_boxes = num_items[batch_idx] |
| |
|
| | cx, cy, bw, bh = rng.rand(num_boxes, 4).T |
| |
|
| | tl_x = ((cx * W) - (W * bw / 2)).clip(0, W) |
| | tl_y = ((cy * H) - (H * bh / 2)).clip(0, H) |
| | br_x = ((cx * W) + (W * bw / 2)).clip(0, W) |
| | br_y = ((cy * H) + (H * bh / 2)).clip(0, H) |
| |
|
| | boxes = np.vstack([tl_x, tl_y, br_x, br_y]).T |
| | class_idxs = rng.randint(1, num_classes, size=num_boxes) |
| |
|
| | gt_bboxes.append(torch.FloatTensor(boxes)) |
| | gt_labels.append(torch.LongTensor(class_idxs)) |
| |
|
| | mask = np.random.randint(0, 2, (len(boxes), H, W), dtype=np.uint8) |
| | gt_masks.append(BitmapMasks(mask, H, W)) |
| |
|
| | mm_inputs = { |
| | 'imgs': torch.FloatTensor(imgs).requires_grad_(True), |
| | 'img_metas': img_metas, |
| | 'gt_bboxes': gt_bboxes, |
| | 'gt_labels': gt_labels, |
| | 'gt_bboxes_ignore': None, |
| | 'gt_masks': gt_masks, |
| | } |
| |
|
| | if with_semantic: |
| | |
| | gt_semantic_seg = np.random.randint( |
| | 0, num_classes, (1, 1, H // 8, W // 8), dtype=np.uint8) |
| | mm_inputs.update( |
| | {'gt_semantic_seg': torch.ByteTensor(gt_semantic_seg)}) |
| |
|
| | return mm_inputs |
| |
|
| |
|
| | def test_yolact_forward(): |
| | model = _get_detector_cfg('yolact/yolact_r50_1x8_coco.py') |
| | model = _replace_r50_with_r18(model) |
| | model.backbone.init_cfg = None |
| |
|
| | from mmdet.models import build_detector |
| | detector = build_detector(model) |
| |
|
| | input_shape = (1, 3, 100, 100) |
| | mm_inputs = _demo_mm_inputs(input_shape) |
| |
|
| | imgs = mm_inputs.pop('imgs') |
| | img_metas = mm_inputs.pop('img_metas') |
| |
|
| | |
| | detector.train() |
| | gt_bboxes = mm_inputs['gt_bboxes'] |
| | gt_labels = mm_inputs['gt_labels'] |
| | gt_masks = mm_inputs['gt_masks'] |
| | losses = detector.forward( |
| | imgs, |
| | img_metas, |
| | gt_bboxes=gt_bboxes, |
| | gt_labels=gt_labels, |
| | gt_masks=gt_masks, |
| | return_loss=True) |
| | assert isinstance(losses, dict) |
| |
|
| | |
| | detector.forward_dummy(imgs) |
| |
|
| | |
| | detector.eval() |
| | with torch.no_grad(): |
| | img_list = [g[None, :] for g in imgs] |
| | batch_results = [] |
| | for one_img, one_meta in zip(img_list, img_metas): |
| | result = detector.forward([one_img], [[one_meta]], |
| | rescale=True, |
| | return_loss=False) |
| | batch_results.append(result) |
| |
|
| |
|
| | def test_detr_forward(): |
| | model = _get_detector_cfg('detr/detr_r50_8x2_150e_coco.py') |
| | model.backbone.depth = 18 |
| | model.bbox_head.in_channels = 512 |
| | model.backbone.init_cfg = None |
| |
|
| | from mmdet.models import build_detector |
| | detector = build_detector(model) |
| |
|
| | input_shape = (1, 3, 100, 100) |
| | mm_inputs = _demo_mm_inputs(input_shape) |
| |
|
| | imgs = mm_inputs.pop('imgs') |
| | img_metas = mm_inputs.pop('img_metas') |
| |
|
| | |
| | detector.train() |
| | gt_bboxes = mm_inputs['gt_bboxes'] |
| | gt_labels = mm_inputs['gt_labels'] |
| | losses = detector.forward( |
| | imgs, |
| | img_metas, |
| | gt_bboxes=gt_bboxes, |
| | gt_labels=gt_labels, |
| | return_loss=True) |
| | assert isinstance(losses, dict) |
| | loss, _ = detector._parse_losses(losses) |
| | assert float(loss.item()) > 0 |
| |
|
| | |
| | mm_inputs = _demo_mm_inputs(input_shape, num_items=[0]) |
| | imgs = mm_inputs.pop('imgs') |
| | img_metas = mm_inputs.pop('img_metas') |
| | gt_bboxes = mm_inputs['gt_bboxes'] |
| | gt_labels = mm_inputs['gt_labels'] |
| | losses = detector.forward( |
| | imgs, |
| | img_metas, |
| | gt_bboxes=gt_bboxes, |
| | gt_labels=gt_labels, |
| | return_loss=True) |
| | assert isinstance(losses, dict) |
| | loss, _ = detector._parse_losses(losses) |
| | assert float(loss.item()) > 0 |
| |
|
| | |
| | detector.eval() |
| | with torch.no_grad(): |
| | img_list = [g[None, :] for g in imgs] |
| | batch_results = [] |
| | for one_img, one_meta in zip(img_list, img_metas): |
| | result = detector.forward([one_img], [[one_meta]], |
| | rescale=True, |
| | return_loss=False) |
| | batch_results.append(result) |
| |
|
| |
|
| | def test_inference_detector(): |
| | from mmcv import ConfigDict |
| |
|
| | from mmdet.apis import inference_detector |
| | from mmdet.models import build_detector |
| |
|
| | |
| | num_class = 3 |
| | model_dict = dict( |
| | type='RetinaNet', |
| | backbone=dict( |
| | type='ResNet', |
| | depth=18, |
| | num_stages=4, |
| | out_indices=(3, ), |
| | norm_cfg=dict(type='BN', requires_grad=False), |
| | norm_eval=True, |
| | style='pytorch'), |
| | neck=None, |
| | bbox_head=dict( |
| | type='RetinaHead', |
| | num_classes=num_class, |
| | in_channels=512, |
| | stacked_convs=1, |
| | feat_channels=256, |
| | anchor_generator=dict( |
| | type='AnchorGenerator', |
| | octave_base_scale=4, |
| | scales_per_octave=3, |
| | ratios=[0.5], |
| | strides=[32]), |
| | bbox_coder=dict( |
| | type='DeltaXYWHBBoxCoder', |
| | target_means=[.0, .0, .0, .0], |
| | target_stds=[1.0, 1.0, 1.0, 1.0]), |
| | ), |
| | test_cfg=dict( |
| | nms_pre=1000, |
| | min_bbox_size=0, |
| | score_thr=0.05, |
| | nms=dict(type='nms', iou_threshold=0.5), |
| | max_per_img=100)) |
| |
|
| | rng = np.random.RandomState(0) |
| | img1 = rng.rand(100, 100, 3) |
| | img2 = rng.rand(100, 100, 3) |
| |
|
| | model = build_detector(ConfigDict(model_dict)) |
| | config = _get_config_module('retinanet/retinanet_r50_fpn_1x_coco.py') |
| | model.cfg = config |
| | |
| | result = inference_detector(model, img1) |
| | assert len(result) == num_class |
| | |
| | result = inference_detector(model, [img1, img2]) |
| | assert len(result) == 2 and len(result[0]) == num_class |
| |
|
| |
|
| | def test_yolox_random_size(): |
| | from mmdet.models import build_detector |
| | model = _get_detector_cfg('yolox/yolox_tiny_8x8_300e_coco.py') |
| | model.random_size_range = (2, 2) |
| | model.input_size = (64, 96) |
| | model.random_size_interval = 1 |
| |
|
| | detector = build_detector(model) |
| | input_shape = (1, 3, 64, 64) |
| | mm_inputs = _demo_mm_inputs(input_shape) |
| |
|
| | imgs = mm_inputs.pop('imgs') |
| | img_metas = mm_inputs.pop('img_metas') |
| |
|
| | |
| | detector.train() |
| | gt_bboxes = mm_inputs['gt_bboxes'] |
| | gt_labels = mm_inputs['gt_labels'] |
| | detector.forward( |
| | imgs, |
| | img_metas, |
| | gt_bboxes=gt_bboxes, |
| | gt_labels=gt_labels, |
| | return_loss=True) |
| | assert detector._input_size == (64, 96) |
| |
|
| |
|
| | def test_maskformer_forward(): |
| | model_cfg = _get_detector_cfg( |
| | 'maskformer/maskformer_r50_mstrain_16x1_75e_coco.py') |
| | base_channels = 32 |
| | model_cfg.backbone.depth = 18 |
| | model_cfg.backbone.init_cfg = None |
| | model_cfg.backbone.base_channels = base_channels |
| | model_cfg.panoptic_head.in_channels = [ |
| | base_channels * 2**i for i in range(4) |
| | ] |
| | model_cfg.panoptic_head.feat_channels = base_channels |
| | model_cfg.panoptic_head.out_channels = base_channels |
| | model_cfg.panoptic_head.pixel_decoder.encoder.\ |
| | transformerlayers.attn_cfgs.embed_dims = base_channels |
| | model_cfg.panoptic_head.pixel_decoder.encoder.\ |
| | transformerlayers.ffn_cfgs.embed_dims = base_channels |
| | model_cfg.panoptic_head.pixel_decoder.encoder.\ |
| | transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 8 |
| | model_cfg.panoptic_head.pixel_decoder.\ |
| | positional_encoding.num_feats = base_channels // 2 |
| | model_cfg.panoptic_head.positional_encoding.\ |
| | num_feats = base_channels // 2 |
| | model_cfg.panoptic_head.transformer_decoder.\ |
| | transformerlayers.attn_cfgs.embed_dims = base_channels |
| | model_cfg.panoptic_head.transformer_decoder.\ |
| | transformerlayers.ffn_cfgs.embed_dims = base_channels |
| | model_cfg.panoptic_head.transformer_decoder.\ |
| | transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 8 |
| | model_cfg.panoptic_head.transformer_decoder.\ |
| | transformerlayers.feedforward_channels = base_channels * 8 |
| |
|
| | from mmdet.core import BitmapMasks |
| | from mmdet.models import build_detector |
| | detector = build_detector(model_cfg) |
| |
|
| | |
| | detector.train() |
| | img_metas = [ |
| | { |
| | 'batch_input_shape': (128, 160), |
| | 'img_shape': (126, 160, 3), |
| | 'ori_shape': (63, 80, 3), |
| | 'pad_shape': (128, 160, 3) |
| | }, |
| | ] |
| | img = torch.rand((1, 3, 128, 160)) |
| | gt_bboxes = None |
| | gt_labels = [ |
| | torch.tensor([10]).long(), |
| | ] |
| | thing_mask1 = np.zeros((1, 128, 160), dtype=np.int32) |
| | thing_mask1[0, :50] = 1 |
| | gt_masks = [ |
| | BitmapMasks(thing_mask1, 128, 160), |
| | ] |
| | stuff_mask1 = torch.zeros((1, 128, 160)).long() |
| | stuff_mask1[0, :50] = 10 |
| | stuff_mask1[0, 50:] = 100 |
| | gt_semantic_seg = [ |
| | stuff_mask1, |
| | ] |
| | losses = detector.forward( |
| | img=img, |
| | img_metas=img_metas, |
| | gt_bboxes=gt_bboxes, |
| | gt_labels=gt_labels, |
| | gt_masks=gt_masks, |
| | gt_semantic_seg=gt_semantic_seg, |
| | return_loss=True) |
| | assert isinstance(losses, dict) |
| | loss, _ = detector._parse_losses(losses) |
| | assert float(loss.item()) > 0 |
| |
|
| | |
| | gt_bboxes = [ |
| | torch.empty((0, 4)).float(), |
| | ] |
| | gt_labels = [ |
| | torch.empty((0, )).long(), |
| | ] |
| | mask = np.zeros((0, 128, 160), dtype=np.uint8) |
| | gt_masks = [ |
| | BitmapMasks(mask, 128, 160), |
| | ] |
| | gt_semantic_seg = [ |
| | torch.randint(0, 133, (0, 128, 160)), |
| | ] |
| | losses = detector.forward( |
| | img, |
| | img_metas, |
| | gt_bboxes=gt_bboxes, |
| | gt_labels=gt_labels, |
| | gt_masks=gt_masks, |
| | gt_semantic_seg=gt_semantic_seg, |
| | return_loss=True) |
| | assert isinstance(losses, dict) |
| | loss, _ = detector._parse_losses(losses) |
| | assert float(loss.item()) > 0 |
| |
|
| | |
| | detector.eval() |
| | with torch.no_grad(): |
| | img_list = [g[None, :] for g in img] |
| | batch_results = [] |
| | for one_img, one_meta in zip(img_list, img_metas): |
| | result = detector.forward([one_img], [[one_meta]], |
| | rescale=True, |
| | return_loss=False) |
| | batch_results.append(result) |
| |
|
| |
|
| | @pytest.mark.parametrize('cfg_file', [ |
| | 'mask2former/mask2former_r50_lsj_8x2_50e_coco.py', |
| | 'mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic.py' |
| | ]) |
| | def test_mask2former_forward(cfg_file): |
| | |
| | model_cfg = _get_detector_cfg(cfg_file) |
| | base_channels = 32 |
| | model_cfg.backbone.depth = 18 |
| | model_cfg.backbone.init_cfg = None |
| | model_cfg.backbone.base_channels = base_channels |
| | model_cfg.panoptic_head.in_channels = [ |
| | base_channels * 2**i for i in range(4) |
| | ] |
| | model_cfg.panoptic_head.feat_channels = base_channels |
| | model_cfg.panoptic_head.out_channels = base_channels |
| | model_cfg.panoptic_head.pixel_decoder.encoder.\ |
| | transformerlayers.attn_cfgs.embed_dims = base_channels |
| | model_cfg.panoptic_head.pixel_decoder.encoder.\ |
| | transformerlayers.ffn_cfgs.embed_dims = base_channels |
| | model_cfg.panoptic_head.pixel_decoder.encoder.\ |
| | transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 4 |
| | model_cfg.panoptic_head.pixel_decoder.\ |
| | positional_encoding.num_feats = base_channels // 2 |
| | model_cfg.panoptic_head.positional_encoding.\ |
| | num_feats = base_channels // 2 |
| | model_cfg.panoptic_head.transformer_decoder.\ |
| | transformerlayers.attn_cfgs.embed_dims = base_channels |
| | model_cfg.panoptic_head.transformer_decoder.\ |
| | transformerlayers.ffn_cfgs.embed_dims = base_channels |
| | model_cfg.panoptic_head.transformer_decoder.\ |
| | transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 8 |
| | model_cfg.panoptic_head.transformer_decoder.\ |
| | transformerlayers.feedforward_channels = base_channels * 8 |
| |
|
| | num_stuff_classes = model_cfg.panoptic_head.num_stuff_classes |
| |
|
| | from mmdet.core import BitmapMasks |
| | from mmdet.models import build_detector |
| | detector = build_detector(model_cfg) |
| |
|
| | def _forward_train(): |
| | losses = detector.forward( |
| | img, |
| | img_metas, |
| | gt_bboxes=gt_bboxes, |
| | gt_labels=gt_labels, |
| | gt_masks=gt_masks, |
| | gt_semantic_seg=gt_semantic_seg, |
| | return_loss=True) |
| | assert isinstance(losses, dict) |
| | loss, _ = detector._parse_losses(losses) |
| | assert float(loss.item()) > 0 |
| |
|
| | |
| | detector.train() |
| | img_metas = [ |
| | { |
| | 'batch_input_shape': (128, 160), |
| | 'img_shape': (126, 160, 3), |
| | 'ori_shape': (63, 80, 3), |
| | 'pad_shape': (128, 160, 3) |
| | }, |
| | ] |
| | img = torch.rand((1, 3, 128, 160)) |
| | gt_bboxes = None |
| | gt_labels = [ |
| | torch.tensor([10]).long(), |
| | ] |
| | thing_mask1 = np.zeros((1, 128, 160), dtype=np.int32) |
| | thing_mask1[0, :50] = 1 |
| | gt_masks = [ |
| | BitmapMasks(thing_mask1, 128, 160), |
| | ] |
| | stuff_mask1 = torch.zeros((1, 128, 160)).long() |
| | stuff_mask1[0, :50] = 10 |
| | stuff_mask1[0, 50:] = 100 |
| | gt_semantic_seg = [ |
| | stuff_mask1, |
| | ] |
| | _forward_train() |
| |
|
| | |
| | gt_semantic_seg = None |
| | _forward_train() |
| |
|
| | |
| | gt_bboxes = [ |
| | torch.empty((0, 4)).float(), |
| | ] |
| | gt_labels = [ |
| | torch.empty((0, )).long(), |
| | ] |
| | mask = np.zeros((0, 128, 160), dtype=np.uint8) |
| | gt_masks = [ |
| | BitmapMasks(mask, 128, 160), |
| | ] |
| | gt_semantic_seg = [ |
| | torch.randint(0, 133, (0, 128, 160)), |
| | ] |
| | _forward_train() |
| |
|
| | |
| | gt_semantic_seg = None |
| | _forward_train() |
| |
|
| | |
| | detector.eval() |
| | with torch.no_grad(): |
| | img_list = [g[None, :] for g in img] |
| | batch_results = [] |
| | for one_img, one_meta in zip(img_list, img_metas): |
| | result = detector.forward([one_img], [[one_meta]], |
| | rescale=True, |
| | return_loss=False) |
| |
|
| | if num_stuff_classes > 0: |
| | assert isinstance(result[0], dict) |
| | else: |
| | assert isinstance(result[0], tuple) |
| |
|
| | batch_results.append(result) |
| |
|