| | |
| | from typing import Optional, Tuple, Union |
| |
|
| | import mmcv |
| | import numpy as np |
| | import pycocotools.mask as maskUtils |
| | import torch |
| | from mmcv.transforms import BaseTransform |
| | from mmcv.transforms import LoadAnnotations as MMCV_LoadAnnotations |
| | from mmcv.transforms import LoadImageFromFile |
| | from mmengine.fileio import get |
| | from mmengine.structures import BaseDataElement |
| |
|
| | from mmdet.registry import TRANSFORMS |
| | from mmdet.structures.bbox import get_box_type |
| | from mmdet.structures.bbox.box_type import autocast_box_type |
| | from mmdet.structures.mask import BitmapMasks, PolygonMasks |
| |
|
| |
|
| | @TRANSFORMS.register_module() |
| | class LoadAnnotations(MMCV_LoadAnnotations): |
| | """Load and process the ``instances`` and ``seg_map`` annotation provided |
| | by dataset. |
| | |
| | The annotation format is as the following: |
| | |
| | .. code-block:: python |
| | |
| | { |
| | 'instances': |
| | [ |
| | { |
| | # List of 4 numbers representing the bounding box of the |
| | # instance, in (x1, y1, x2, y2) order. |
| | 'bbox': [x1, y1, x2, y2], |
| | |
| | # Label of image classification. |
| | 'bbox_label': 1, |
| | |
| | # Used in instance/panoptic segmentation. The segmentation mask |
| | # of the instance or the information of segments. |
| | # 1. If list[list[float]], it represents a list of polygons, |
| | # one for each connected component of the object. Each |
| | # list[float] is one simple polygon in the format of |
| | # [x1, y1, ..., xn, yn] (n≥3). The Xs and Ys are absolute |
| | # coordinates in unit of pixels. |
| | # 2. If dict, it represents the per-pixel segmentation mask in |
| | # COCO’s compressed RLE format. The dict should have keys |
| | # “size” and “counts”. Can be loaded by pycocotools |
| | 'mask': list[list[float]] or dict, |
| | |
| | } |
| | ] |
| | # Filename of semantic or panoptic segmentation ground truth file. |
| | 'seg_map_path': 'a/b/c' |
| | } |
| | |
| | After this module, the annotation has been changed to the format below: |
| | |
| | .. code-block:: python |
| | |
| | { |
| | # In (x1, y1, x2, y2) order, float type. N is the number of bboxes |
| | # in an image |
| | 'gt_bboxes': BaseBoxes(N, 4) |
| | # In int type. |
| | 'gt_bboxes_labels': np.ndarray(N, ) |
| | # In built-in class |
| | 'gt_masks': PolygonMasks (H, W) or BitmapMasks (H, W) |
| | # In uint8 type. |
| | 'gt_seg_map': np.ndarray (H, W) |
| | # in (x, y, v) order, float type. |
| | } |
| | |
| | Required Keys: |
| | |
| | - height |
| | - width |
| | - instances |
| | |
| | - bbox (optional) |
| | - bbox_label |
| | - mask (optional) |
| | - ignore_flag |
| | |
| | - seg_map_path (optional) |
| | |
| | Added Keys: |
| | |
| | - gt_bboxes (BaseBoxes[torch.float32]) |
| | - gt_bboxes_labels (np.int64) |
| | - gt_masks (BitmapMasks | PolygonMasks) |
| | - gt_seg_map (np.uint8) |
| | - gt_ignore_flags (bool) |
| | |
| | Args: |
| | with_bbox (bool): Whether to parse and load the bbox annotation. |
| | Defaults to True. |
| | with_label (bool): Whether to parse and load the label annotation. |
| | Defaults to True. |
| | with_mask (bool): Whether to parse and load the mask annotation. |
| | Default: False. |
| | with_seg (bool): Whether to parse and load the semantic segmentation |
| | annotation. Defaults to False. |
| | poly2mask (bool): Whether to convert mask to bitmap. Default: True. |
| | box_type (str): The box type used to wrap the bboxes. If ``box_type`` |
| | is None, gt_bboxes will keep being np.ndarray. Defaults to 'hbox'. |
| | imdecode_backend (str): The image decoding backend type. The backend |
| | argument for :func:``mmcv.imfrombytes``. |
| | See :fun:``mmcv.imfrombytes`` for details. |
| | Defaults to 'cv2'. |
| | backend_args (dict, optional): Arguments to instantiate the |
| | corresponding backend. Defaults to None. |
| | """ |
| |
|
| | def __init__(self, |
| | with_mask: bool = False, |
| | poly2mask: bool = True, |
| | box_type: str = 'hbox', |
| | **kwargs) -> None: |
| | super(LoadAnnotations, self).__init__(**kwargs) |
| | self.with_mask = with_mask |
| | self.poly2mask = poly2mask |
| | self.box_type = box_type |
| |
|
| | def _load_bboxes(self, results: dict) -> None: |
| | """Private function to load bounding box annotations. |
| | |
| | Args: |
| | results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
| | Returns: |
| | dict: The dict contains loaded bounding box annotations. |
| | """ |
| | gt_bboxes = [] |
| | gt_ignore_flags = [] |
| | for instance in results.get('instances', []): |
| | gt_bboxes.append(instance['bbox']) |
| | gt_ignore_flags.append(instance['ignore_flag']) |
| | if self.box_type is None: |
| | results['gt_bboxes'] = np.array( |
| | gt_bboxes, dtype=np.float32).reshape((-1, 4)) |
| | else: |
| | _, box_type_cls = get_box_type(self.box_type) |
| | results['gt_bboxes'] = box_type_cls(gt_bboxes, dtype=torch.float32) |
| | results['gt_ignore_flags'] = np.array(gt_ignore_flags, dtype=bool) |
| |
|
| | def _load_labels(self, results: dict) -> None: |
| | """Private function to load label annotations. |
| | |
| | Args: |
| | results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
| | |
| | Returns: |
| | dict: The dict contains loaded label annotations. |
| | """ |
| | gt_bboxes_labels = [] |
| | for instance in results.get('instances', []): |
| | gt_bboxes_labels.append(instance['bbox_label']) |
| | |
| | results['gt_bboxes_labels'] = np.array( |
| | gt_bboxes_labels, dtype=np.int64) |
| |
|
| | def _poly2mask(self, mask_ann: Union[list, dict], img_h: int, |
| | img_w: int) -> np.ndarray: |
| | """Private function to convert masks represented with polygon to |
| | bitmaps. |
| | |
| | Args: |
| | mask_ann (list | dict): Polygon mask annotation input. |
| | img_h (int): The height of output mask. |
| | img_w (int): The width of output mask. |
| | |
| | Returns: |
| | np.ndarray: The decode bitmap mask of shape (img_h, img_w). |
| | """ |
| |
|
| | if isinstance(mask_ann, list): |
| | |
| | |
| | rles = maskUtils.frPyObjects(mask_ann, img_h, img_w) |
| | rle = maskUtils.merge(rles) |
| | elif isinstance(mask_ann['counts'], list): |
| | |
| | rle = maskUtils.frPyObjects(mask_ann, img_h, img_w) |
| | else: |
| | |
| | rle = mask_ann |
| | mask = maskUtils.decode(rle) |
| | return mask |
| |
|
| | def _process_masks(self, results: dict) -> list: |
| | """Process gt_masks and filter invalid polygons. |
| | |
| | Args: |
| | results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
| | |
| | Returns: |
| | list: Processed gt_masks. |
| | """ |
| | gt_masks = [] |
| | gt_ignore_flags = [] |
| | for instance in results.get('instances', []): |
| | gt_mask = instance['mask'] |
| | |
| | |
| | if isinstance(gt_mask, list): |
| | gt_mask = [ |
| | np.array(polygon) for polygon in gt_mask |
| | if len(polygon) % 2 == 0 and len(polygon) >= 6 |
| | ] |
| | if len(gt_mask) == 0: |
| | |
| | instance['ignore_flag'] = 1 |
| | gt_mask = [np.zeros(6)] |
| | elif not self.poly2mask: |
| | |
| | |
| | instance['ignore_flag'] = 1 |
| | gt_mask = [np.zeros(6)] |
| | elif isinstance(gt_mask, dict) and \ |
| | not (gt_mask.get('counts') is not None and |
| | gt_mask.get('size') is not None and |
| | isinstance(gt_mask['counts'], (list, str))): |
| | |
| | |
| | instance['ignore_flag'] = 1 |
| | gt_mask = [np.zeros(6)] |
| | gt_masks.append(gt_mask) |
| | |
| | gt_ignore_flags.append(instance['ignore_flag']) |
| | results['gt_ignore_flags'] = np.array(gt_ignore_flags, dtype=bool) |
| | return gt_masks |
| |
|
| | def _load_masks(self, results: dict) -> None: |
| | """Private function to load mask annotations. |
| | |
| | Args: |
| | results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
| | """ |
| | h, w = results['ori_shape'] |
| | gt_masks = self._process_masks(results) |
| | if self.poly2mask: |
| | gt_masks = BitmapMasks( |
| | [self._poly2mask(mask, h, w) for mask in gt_masks], h, w) |
| | else: |
| | |
| | gt_masks = PolygonMasks([mask for mask in gt_masks], h, w) |
| | results['gt_masks'] = gt_masks |
| |
|
| | def transform(self, results: dict) -> dict: |
| | """Function to load multiple types annotations. |
| | |
| | Args: |
| | results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
| | |
| | Returns: |
| | dict: The dict contains loaded bounding box, label and |
| | semantic segmentation. |
| | """ |
| |
|
| | if self.with_bbox: |
| | self._load_bboxes(results) |
| | if self.with_label: |
| | self._load_labels(results) |
| | if self.with_mask: |
| | self._load_masks(results) |
| | if self.with_seg: |
| | self._load_seg_map(results) |
| | return results |
| |
|
| | def __repr__(self) -> str: |
| | repr_str = self.__class__.__name__ |
| | repr_str += f'(with_bbox={self.with_bbox}, ' |
| | repr_str += f'with_label={self.with_label}, ' |
| | repr_str += f'with_mask={self.with_mask}, ' |
| | repr_str += f'with_seg={self.with_seg}, ' |
| | repr_str += f'poly2mask={self.poly2mask}, ' |
| | repr_str += f"imdecode_backend='{self.imdecode_backend}', " |
| | repr_str += f'backend_args={self.backend_args})' |
| | return repr_str |
| |
|
| |
|
| |
|