# -*- coding: utf-8 -*- # Copyright (c) Facebook, Inc. and its affiliates. """ Common data processing utilities that are used in a typical object detection data pipeline. """ import logging import numpy as np from typing import List, Union import pycocotools.mask as mask_util import torch from PIL import Image from .detectron2.structures import ( Boxes, BoxMode, ) from .detectron2.utils.file_io import PathManager from .detectron2.data2 import transforms as T __all__ = [ "SizeMismatchError", "convert_image_to_rgb", "check_image_size", "transform_proposals", "transform_instance_annotations", "annotations_to_instances", "annotations_to_instances_rotated", "build_augmentation", "build_transform_gen", "create_keypoint_hflip_indices", "filter_empty_instances", "read_image", ] class SizeMismatchError(ValueError): """ When loaded image has difference width/height compared with annotation. """ # https://en.wikipedia.org/wiki/YUV#SDTV_with_BT.601 _M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]] _M_YUV2RGB = [[1.0, 0.0, 1.13983], [1.0, -0.39465, -0.58060], [1.0, 2.03211, 0.0]] # https://www.exiv2.org/tags.html _EXIF_ORIENT = 274 # exif 'Orientation' tag def convert_PIL_to_numpy(image, format): """ Convert PIL image to numpy array of target format. Args: image (PIL.Image): a PIL image format (str): the format of output image Returns: (np.ndarray): also see `read_image` """ if format is not None: # PIL only supports RGB, so convert to RGB and flip channels over below conversion_format = format if format in ["BGR", "YUV-BT.601"]: conversion_format = "RGB" image = image.convert(conversion_format) image = np.asarray(image) # PIL squeezes out the channel dimension for "L", so make it HWC if format == "L": image = np.expand_dims(image, -1) # handle formats not supported by PIL elif format == "BGR": # flip channels if needed image = image[:, :, ::-1] elif format == "YUV-BT.601": image = image / 255.0 image = np.dot(image, np.array(_M_RGB2YUV).T) elif format != "RGB": raise ValueError(f"Unsupported image format: {format}") return image def convert_image_to_rgb(image, format): """ Convert an image from given format to RGB. Args: image (np.ndarray or Tensor): an HWC image format (str): the format of input image, also see `read_image` Returns: (np.ndarray): (H,W,3) RGB image in 0-255 range, can be either float or uint8 """ if isinstance(image, torch.Tensor): image = image.cpu().numpy() if format == "BGR": image = image[:, :, [2, 1, 0]] elif format == "YUV-BT.601": image = np.dot(image, np.array(_M_YUV2RGB).T) image = image * 255.0 else: if format == "L": image = image[:, :, 0] image = image.astype(np.uint8) image = np.asarray(Image.fromarray(image, mode=format).convert("RGB")) return image def _apply_exif_orientation(image): """ Applies the exif orientation correctly. This code exists per the bug: https://github.com/python-pillow/Pillow/issues/3973 with the function `ImageOps.exif_transpose`. The Pillow source raises errors with various methods, especially `tobytes` Function based on: https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59 https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527 Args: image (PIL.Image): a PIL image Returns: (PIL.Image): the PIL image with exif orientation applied, if applicable """ if not hasattr(image, "getexif"): return image try: exif = image.getexif() except Exception: # https://github.com/facebookresearch/detectron2/issues/1885 exif = None if exif is None: return image orientation = exif.get(_EXIF_ORIENT) method = { 2: Image.FLIP_LEFT_RIGHT, 3: Image.ROTATE_180, 4: Image.FLIP_TOP_BOTTOM, 5: Image.TRANSPOSE, 6: Image.ROTATE_270, 7: Image.TRANSVERSE, 8: Image.ROTATE_90, }.get(orientation) if method is not None: return image.transpose(method) return image def read_image(file_name, format=None): """ Read an image into the given format. Will apply rotation and flipping if the image has such exif information. Args: file_name (str): image file path format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601". Returns: image (np.ndarray): an HWC image in the given format, which is 0-255, uint8 for supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601. """ with PathManager.open(file_name, "rb") as f: image = Image.open(f) # work around this bug: https://github.com/python-pillow/Pillow/issues/3973 image = _apply_exif_orientation(image) return convert_PIL_to_numpy(image, format) def check_image_size(dataset_dict, image): """ Raise an error if the image does not match the size specified in the dict. """ if "width" in dataset_dict or "height" in dataset_dict: image_wh = (image.shape[1], image.shape[0]) expected_wh = (dataset_dict["width"], dataset_dict["height"]) if not image_wh == expected_wh: raise SizeMismatchError( "Mismatched image shape{}, got {}, expect {}.".format( " for image " + dataset_dict["file_name"] if "file_name" in dataset_dict else "", image_wh, expected_wh, ) + " Please check the width/height in your annotation." ) # To ensure bbox always remap to original image size if "width" not in dataset_dict: dataset_dict["width"] = image.shape[1] if "height" not in dataset_dict: dataset_dict["height"] = image.shape[0] def transform_instance_annotations( annotation, transforms, image_size, *, keypoint_hflip_indices=None ): """ Apply transforms to box, segmentation and keypoints annotations of a single instance. It will use `transforms.apply_box` for the box, and `transforms.apply_coords` for segmentation polygons & keypoints. If you need anything more specially designed for each data structure, you'll need to implement your own version of this function or the transforms. Args: annotation (dict): dict of instance annotations for a single instance. It will be modified in-place. transforms (TransformList or list[Transform]): image_size (tuple): the height, width of the transformed image keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. Returns: dict: the same input dict with fields "bbox", "segmentation", "keypoints" transformed according to `transforms`. The "bbox_mode" field will be set to XYXY_ABS. """ if isinstance(transforms, (tuple, list)): transforms = T.TransformList(transforms) # if "bbox" in annotation and annotation["bbox"] is not None: # # bbox is 1d (per-instance bounding box) # bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) # # clip transformed bbox to image size # bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0) # annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1]) # annotation["bbox_mode"] = BoxMode.XYXY_ABS if "segmentation" in annotation: # each instance contains 1 or more polygons segm = annotation["segmentation"] if isinstance(segm, list): # polygons polygons = [np.asarray(p).reshape(-1, 2) for p in segm] annotation["segmentation"] = [ p.reshape(-1) for p in transforms.apply_polygons(polygons) ] elif isinstance(segm, dict): # RLE mask = mask_util.decode(segm) mask = transforms.apply_segmentation(mask) assert tuple(mask.shape[:2]) == image_size, f"mask.shape: {mask.shape}, image_size: {image_size}" annotation["segmentation"] = mask else: raise ValueError( "Cannot transform segmentation of type '{}'!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict.".format(type(segm)) ) return annotation def build_augmentation(cfg, is_train): """ Create a list of default :class:`Augmentation` from config. Now it includes resizing and flipping. Returns: list[Augmentation] """ if is_train: min_size = cfg.INPUT.MIN_SIZE_TRAIN max_size = cfg.INPUT.MAX_SIZE_TRAIN sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING else: min_size = cfg.INPUT.MIN_SIZE_TEST max_size = cfg.INPUT.MAX_SIZE_TEST sample_style = "choice" augmentation = [T.ResizeShortestEdge(min_size, max_size, sample_style)] if is_train and cfg.INPUT.RANDOM_FLIP != "none": augmentation.append( T.RandomFlip( horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal", vertical=cfg.INPUT.RANDOM_FLIP == "vertical", ) ) return augmentation build_transform_gen = build_augmentation """ Alias for backward-compatibility. """