|
|
|
|
|
|
|
|
|
|
|
""" |
|
|
Common data processing utilities that are used in a |
|
|
typical object detection data pipeline. |
|
|
""" |
|
|
import logging |
|
|
import numpy as np |
|
|
from typing import List, Union |
|
|
import pycocotools.mask as mask_util |
|
|
import torch |
|
|
from PIL import Image |
|
|
|
|
|
from .detectron2.structures import ( |
|
|
Boxes, |
|
|
BoxMode, |
|
|
) |
|
|
from .detectron2.utils.file_io import PathManager |
|
|
|
|
|
from .detectron2.data2 import transforms as T |
|
|
|
|
|
__all__ = [ |
|
|
"SizeMismatchError", |
|
|
"convert_image_to_rgb", |
|
|
"check_image_size", |
|
|
"transform_proposals", |
|
|
"transform_instance_annotations", |
|
|
"annotations_to_instances", |
|
|
"annotations_to_instances_rotated", |
|
|
"build_augmentation", |
|
|
"build_transform_gen", |
|
|
"create_keypoint_hflip_indices", |
|
|
"filter_empty_instances", |
|
|
"read_image", |
|
|
] |
|
|
|
|
|
|
|
|
class SizeMismatchError(ValueError): |
|
|
""" |
|
|
When loaded image has difference width/height compared with annotation. |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]] |
|
|
_M_YUV2RGB = [[1.0, 0.0, 1.13983], [1.0, -0.39465, -0.58060], [1.0, 2.03211, 0.0]] |
|
|
|
|
|
|
|
|
_EXIF_ORIENT = 274 |
|
|
|
|
|
|
|
|
def convert_PIL_to_numpy(image, format): |
|
|
""" |
|
|
Convert PIL image to numpy array of target format. |
|
|
|
|
|
Args: |
|
|
image (PIL.Image): a PIL image |
|
|
format (str): the format of output image |
|
|
|
|
|
Returns: |
|
|
(np.ndarray): also see `read_image` |
|
|
""" |
|
|
if format is not None: |
|
|
|
|
|
conversion_format = format |
|
|
if format in ["BGR", "YUV-BT.601"]: |
|
|
conversion_format = "RGB" |
|
|
image = image.convert(conversion_format) |
|
|
image = np.asarray(image) |
|
|
|
|
|
if format == "L": |
|
|
image = np.expand_dims(image, -1) |
|
|
|
|
|
|
|
|
elif format == "BGR": |
|
|
|
|
|
image = image[:, :, ::-1] |
|
|
elif format == "YUV-BT.601": |
|
|
image = image / 255.0 |
|
|
image = np.dot(image, np.array(_M_RGB2YUV).T) |
|
|
elif format != "RGB": |
|
|
raise ValueError(f"Unsupported image format: {format}") |
|
|
|
|
|
return image |
|
|
|
|
|
|
|
|
def convert_image_to_rgb(image, format): |
|
|
""" |
|
|
Convert an image from given format to RGB. |
|
|
|
|
|
Args: |
|
|
image (np.ndarray or Tensor): an HWC image |
|
|
format (str): the format of input image, also see `read_image` |
|
|
|
|
|
Returns: |
|
|
(np.ndarray): (H,W,3) RGB image in 0-255 range, can be either float or uint8 |
|
|
""" |
|
|
if isinstance(image, torch.Tensor): |
|
|
image = image.cpu().numpy() |
|
|
if format == "BGR": |
|
|
image = image[:, :, [2, 1, 0]] |
|
|
elif format == "YUV-BT.601": |
|
|
image = np.dot(image, np.array(_M_YUV2RGB).T) |
|
|
image = image * 255.0 |
|
|
else: |
|
|
if format == "L": |
|
|
image = image[:, :, 0] |
|
|
image = image.astype(np.uint8) |
|
|
image = np.asarray(Image.fromarray(image, mode=format).convert("RGB")) |
|
|
return image |
|
|
|
|
|
|
|
|
def _apply_exif_orientation(image): |
|
|
""" |
|
|
Applies the exif orientation correctly. |
|
|
|
|
|
This code exists per the bug: |
|
|
https://github.com/python-pillow/Pillow/issues/3973 |
|
|
with the function `ImageOps.exif_transpose`. The Pillow source raises errors with |
|
|
various methods, especially `tobytes` |
|
|
|
|
|
Function based on: |
|
|
https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59 |
|
|
https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527 |
|
|
|
|
|
Args: |
|
|
image (PIL.Image): a PIL image |
|
|
|
|
|
Returns: |
|
|
(PIL.Image): the PIL image with exif orientation applied, if applicable |
|
|
""" |
|
|
if not hasattr(image, "getexif"): |
|
|
return image |
|
|
|
|
|
try: |
|
|
exif = image.getexif() |
|
|
except Exception: |
|
|
exif = None |
|
|
|
|
|
if exif is None: |
|
|
return image |
|
|
|
|
|
orientation = exif.get(_EXIF_ORIENT) |
|
|
|
|
|
method = { |
|
|
2: Image.FLIP_LEFT_RIGHT, |
|
|
3: Image.ROTATE_180, |
|
|
4: Image.FLIP_TOP_BOTTOM, |
|
|
5: Image.TRANSPOSE, |
|
|
6: Image.ROTATE_270, |
|
|
7: Image.TRANSVERSE, |
|
|
8: Image.ROTATE_90, |
|
|
}.get(orientation) |
|
|
|
|
|
if method is not None: |
|
|
return image.transpose(method) |
|
|
return image |
|
|
|
|
|
|
|
|
def read_image(file_name, format=None): |
|
|
""" |
|
|
Read an image into the given format. |
|
|
Will apply rotation and flipping if the image has such exif information. |
|
|
|
|
|
Args: |
|
|
file_name (str): image file path |
|
|
format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601". |
|
|
|
|
|
Returns: |
|
|
image (np.ndarray): |
|
|
an HWC image in the given format, which is 0-255, uint8 for |
|
|
supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601. |
|
|
""" |
|
|
with PathManager.open(file_name, "rb") as f: |
|
|
image = Image.open(f) |
|
|
|
|
|
|
|
|
image = _apply_exif_orientation(image) |
|
|
return convert_PIL_to_numpy(image, format) |
|
|
|
|
|
|
|
|
def check_image_size(dataset_dict, image): |
|
|
""" |
|
|
Raise an error if the image does not match the size specified in the dict. |
|
|
""" |
|
|
if "width" in dataset_dict or "height" in dataset_dict: |
|
|
image_wh = (image.shape[1], image.shape[0]) |
|
|
expected_wh = (dataset_dict["width"], dataset_dict["height"]) |
|
|
if not image_wh == expected_wh: |
|
|
raise SizeMismatchError( |
|
|
"Mismatched image shape{}, got {}, expect {}.".format( |
|
|
" for image " + dataset_dict["file_name"] |
|
|
if "file_name" in dataset_dict |
|
|
else "", |
|
|
image_wh, |
|
|
expected_wh, |
|
|
) |
|
|
+ " Please check the width/height in your annotation." |
|
|
) |
|
|
|
|
|
|
|
|
if "width" not in dataset_dict: |
|
|
dataset_dict["width"] = image.shape[1] |
|
|
if "height" not in dataset_dict: |
|
|
dataset_dict["height"] = image.shape[0] |
|
|
|
|
|
|
|
|
def transform_instance_annotations( |
|
|
annotation, transforms, image_size, *, keypoint_hflip_indices=None |
|
|
): |
|
|
""" |
|
|
Apply transforms to box, segmentation and keypoints annotations of a single instance. |
|
|
|
|
|
It will use `transforms.apply_box` for the box, and |
|
|
`transforms.apply_coords` for segmentation polygons & keypoints. |
|
|
If you need anything more specially designed for each data structure, |
|
|
you'll need to implement your own version of this function or the transforms. |
|
|
|
|
|
Args: |
|
|
annotation (dict): dict of instance annotations for a single instance. |
|
|
It will be modified in-place. |
|
|
transforms (TransformList or list[Transform]): |
|
|
image_size (tuple): the height, width of the transformed image |
|
|
keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. |
|
|
|
|
|
Returns: |
|
|
dict: |
|
|
the same input dict with fields "bbox", "segmentation", "keypoints" |
|
|
transformed according to `transforms`. |
|
|
The "bbox_mode" field will be set to XYXY_ABS. |
|
|
""" |
|
|
if isinstance(transforms, (tuple, list)): |
|
|
transforms = T.TransformList(transforms) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if "segmentation" in annotation: |
|
|
|
|
|
segm = annotation["segmentation"] |
|
|
if isinstance(segm, list): |
|
|
|
|
|
polygons = [np.asarray(p).reshape(-1, 2) for p in segm] |
|
|
annotation["segmentation"] = [ |
|
|
p.reshape(-1) for p in transforms.apply_polygons(polygons) |
|
|
] |
|
|
elif isinstance(segm, dict): |
|
|
|
|
|
mask = mask_util.decode(segm) |
|
|
mask = transforms.apply_segmentation(mask) |
|
|
assert tuple(mask.shape[:2]) == image_size, f"mask.shape: {mask.shape}, image_size: {image_size}" |
|
|
annotation["segmentation"] = mask |
|
|
else: |
|
|
raise ValueError( |
|
|
"Cannot transform segmentation of type '{}'!" |
|
|
"Supported types are: polygons as list[list[float] or ndarray]," |
|
|
" COCO-style RLE as a dict.".format(type(segm)) |
|
|
) |
|
|
|
|
|
return annotation |
|
|
|
|
|
|
|
|
def build_augmentation(cfg, is_train): |
|
|
""" |
|
|
Create a list of default :class:`Augmentation` from config. |
|
|
Now it includes resizing and flipping. |
|
|
|
|
|
Returns: |
|
|
list[Augmentation] |
|
|
""" |
|
|
if is_train: |
|
|
min_size = cfg.INPUT.MIN_SIZE_TRAIN |
|
|
max_size = cfg.INPUT.MAX_SIZE_TRAIN |
|
|
sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING |
|
|
else: |
|
|
min_size = cfg.INPUT.MIN_SIZE_TEST |
|
|
max_size = cfg.INPUT.MAX_SIZE_TEST |
|
|
sample_style = "choice" |
|
|
augmentation = [T.ResizeShortestEdge(min_size, max_size, sample_style)] |
|
|
if is_train and cfg.INPUT.RANDOM_FLIP != "none": |
|
|
augmentation.append( |
|
|
T.RandomFlip( |
|
|
horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal", |
|
|
vertical=cfg.INPUT.RANDOM_FLIP == "vertical", |
|
|
) |
|
|
) |
|
|
return augmentation |
|
|
|
|
|
|
|
|
build_transform_gen = build_augmentation |
|
|
""" |
|
|
Alias for backward-compatibility. |
|
|
""" |