| | |
| | import copy |
| | import logging |
| | import numpy as np |
| | from typing import List, Optional, Union |
| | import torch |
| |
|
| | from detectron2.config import configurable |
| |
|
| | from . import detection_utils as utils |
| | from . import transforms as T |
| |
|
| | """ |
| | This file contains the default mapping that's applied to "dataset dicts". |
| | """ |
| |
|
| | __all__ = ["DatasetMapper"] |
| |
|
| |
|
| | class DatasetMapper: |
| | """ |
| | A callable which takes a dataset dict in Detectron2 Dataset format, |
| | and map it into a format used by the model. |
| | |
| | This is the default callable to be used to map your dataset dict into training data. |
| | You may need to follow it to implement your own one for customized logic, |
| | such as a different way to read or transform images. |
| | See :doc:`/tutorials/data_loading` for details. |
| | |
| | The callable currently does the following: |
| | |
| | 1. Read the image from "file_name" |
| | 2. Applies cropping/geometric transforms to the image and annotations |
| | 3. Prepare data and annotations to Tensor and :class:`Instances` |
| | """ |
| |
|
| | @configurable |
| | def __init__( |
| | self, |
| | is_train: bool, |
| | *, |
| | augmentations: List[Union[T.Augmentation, T.Transform]], |
| | image_format: str, |
| | use_instance_mask: bool = False, |
| | use_keypoint: bool = False, |
| | instance_mask_format: str = "polygon", |
| | keypoint_hflip_indices: Optional[np.ndarray] = None, |
| | precomputed_proposal_topk: Optional[int] = None, |
| | recompute_boxes: bool = False, |
| | ): |
| | """ |
| | NOTE: this interface is experimental. |
| | |
| | Args: |
| | is_train: whether it's used in training or inference |
| | augmentations: a list of augmentations or deterministic transforms to apply |
| | image_format: an image format supported by :func:`detection_utils.read_image`. |
| | use_instance_mask: whether to process instance segmentation annotations, if available |
| | use_keypoint: whether to process keypoint annotations if available |
| | instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation |
| | masks into this format. |
| | keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices` |
| | precomputed_proposal_topk: if given, will load pre-computed |
| | proposals from dataset_dict and keep the top k proposals for each image. |
| | recompute_boxes: whether to overwrite bounding box annotations |
| | by computing tight bounding boxes from instance mask annotations. |
| | """ |
| | if recompute_boxes: |
| | assert use_instance_mask, "recompute_boxes requires instance masks" |
| | |
| | self.is_train = is_train |
| | self.augmentations = T.AugmentationList(augmentations) |
| | self.image_format = image_format |
| | self.use_instance_mask = use_instance_mask |
| | self.instance_mask_format = instance_mask_format |
| | self.use_keypoint = use_keypoint |
| | self.keypoint_hflip_indices = keypoint_hflip_indices |
| | self.proposal_topk = precomputed_proposal_topk |
| | self.recompute_boxes = recompute_boxes |
| | |
| | logger = logging.getLogger(__name__) |
| | mode = "training" if is_train else "inference" |
| | logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}") |
| |
|
| | @classmethod |
| | def from_config(cls, cfg, is_train: bool = True): |
| | augs = utils.build_augmentation(cfg, is_train) |
| | if cfg.INPUT.CROP.ENABLED and is_train: |
| | augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)) |
| | recompute_boxes = cfg.MODEL.MASK_ON |
| | else: |
| | recompute_boxes = False |
| |
|
| | ret = { |
| | "is_train": is_train, |
| | "augmentations": augs, |
| | "image_format": cfg.INPUT.FORMAT, |
| | "use_instance_mask": cfg.MODEL.MASK_ON, |
| | "instance_mask_format": cfg.INPUT.MASK_FORMAT, |
| | "use_keypoint": cfg.MODEL.KEYPOINT_ON, |
| | "recompute_boxes": recompute_boxes, |
| | } |
| |
|
| | if cfg.MODEL.KEYPOINT_ON: |
| | ret["keypoint_hflip_indices"] = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN) |
| |
|
| | if cfg.MODEL.LOAD_PROPOSALS: |
| | ret["precomputed_proposal_topk"] = ( |
| | cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN |
| | if is_train |
| | else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST |
| | ) |
| | return ret |
| |
|
| | def _transform_annotations(self, dataset_dict, transforms, image_shape): |
| | |
| | for anno in dataset_dict["annotations"]: |
| | if not self.use_instance_mask: |
| | anno.pop("segmentation", None) |
| | if not self.use_keypoint: |
| | anno.pop("keypoints", None) |
| |
|
| | |
| | annos = [ |
| | utils.transform_instance_annotations( |
| | obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices |
| | ) |
| | for obj in dataset_dict.pop("annotations") |
| | if obj.get("iscrowd", 0) == 0 |
| | ] |
| | instances = utils.annotations_to_instances( |
| | annos, image_shape, mask_format=self.instance_mask_format |
| | ) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | if self.recompute_boxes: |
| | instances.gt_boxes = instances.gt_masks.get_bounding_boxes() |
| | dataset_dict["instances"] = utils.filter_empty_instances(instances) |
| |
|
| | def __call__(self, dataset_dict): |
| | """ |
| | Args: |
| | dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. |
| | |
| | Returns: |
| | dict: a format that builtin models in detectron2 accept |
| | """ |
| | dataset_dict = copy.deepcopy(dataset_dict) |
| | |
| | image = utils.read_image(dataset_dict["file_name"], format=self.image_format) |
| | utils.check_image_size(dataset_dict, image) |
| |
|
| | |
| | if "sem_seg_file_name" in dataset_dict: |
| | sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) |
| | else: |
| | sem_seg_gt = None |
| |
|
| | aug_input = T.AugInput(image, sem_seg=sem_seg_gt) |
| | transforms = self.augmentations(aug_input) |
| | image, sem_seg_gt = aug_input.image, aug_input.sem_seg |
| |
|
| | image_shape = image.shape[:2] |
| | |
| | |
| | |
| | dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) |
| | if sem_seg_gt is not None: |
| | dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) |
| |
|
| | |
| | |
| | if self.proposal_topk is not None: |
| | utils.transform_proposals( |
| | dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk |
| | ) |
| |
|
| | if not self.is_train: |
| | |
| | dataset_dict.pop("annotations", None) |
| | dataset_dict.pop("sem_seg_file_name", None) |
| | return dataset_dict |
| |
|
| | if "annotations" in dataset_dict: |
| | self._transform_annotations(dataset_dict, transforms, image_shape) |
| |
|
| | return dataset_dict |
| |
|