File size: 9,760 Bytes
032e687 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 |
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
"""
Common data processing utilities that are used in a
typical object detection data pipeline.
"""
import logging
import numpy as np
from typing import List, Union
import pycocotools.mask as mask_util
import torch
from PIL import Image
from .detectron2.structures import (
Boxes,
BoxMode,
)
from .detectron2.utils.file_io import PathManager
from .detectron2.data2 import transforms as T
__all__ = [
"SizeMismatchError",
"convert_image_to_rgb",
"check_image_size",
"transform_proposals",
"transform_instance_annotations",
"annotations_to_instances",
"annotations_to_instances_rotated",
"build_augmentation",
"build_transform_gen",
"create_keypoint_hflip_indices",
"filter_empty_instances",
"read_image",
]
class SizeMismatchError(ValueError):
"""
When loaded image has difference width/height compared with annotation.
"""
# https://en.wikipedia.org/wiki/YUV#SDTV_with_BT.601
_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]]
_M_YUV2RGB = [[1.0, 0.0, 1.13983], [1.0, -0.39465, -0.58060], [1.0, 2.03211, 0.0]]
# https://www.exiv2.org/tags.html
_EXIF_ORIENT = 274 # exif 'Orientation' tag
def convert_PIL_to_numpy(image, format):
"""
Convert PIL image to numpy array of target format.
Args:
image (PIL.Image): a PIL image
format (str): the format of output image
Returns:
(np.ndarray): also see `read_image`
"""
if format is not None:
# PIL only supports RGB, so convert to RGB and flip channels over below
conversion_format = format
if format in ["BGR", "YUV-BT.601"]:
conversion_format = "RGB"
image = image.convert(conversion_format)
image = np.asarray(image)
# PIL squeezes out the channel dimension for "L", so make it HWC
if format == "L":
image = np.expand_dims(image, -1)
# handle formats not supported by PIL
elif format == "BGR":
# flip channels if needed
image = image[:, :, ::-1]
elif format == "YUV-BT.601":
image = image / 255.0
image = np.dot(image, np.array(_M_RGB2YUV).T)
elif format != "RGB":
raise ValueError(f"Unsupported image format: {format}")
return image
def convert_image_to_rgb(image, format):
"""
Convert an image from given format to RGB.
Args:
image (np.ndarray or Tensor): an HWC image
format (str): the format of input image, also see `read_image`
Returns:
(np.ndarray): (H,W,3) RGB image in 0-255 range, can be either float or uint8
"""
if isinstance(image, torch.Tensor):
image = image.cpu().numpy()
if format == "BGR":
image = image[:, :, [2, 1, 0]]
elif format == "YUV-BT.601":
image = np.dot(image, np.array(_M_YUV2RGB).T)
image = image * 255.0
else:
if format == "L":
image = image[:, :, 0]
image = image.astype(np.uint8)
image = np.asarray(Image.fromarray(image, mode=format).convert("RGB"))
return image
def _apply_exif_orientation(image):
"""
Applies the exif orientation correctly.
This code exists per the bug:
https://github.com/python-pillow/Pillow/issues/3973
with the function `ImageOps.exif_transpose`. The Pillow source raises errors with
various methods, especially `tobytes`
Function based on:
https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59
https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527
Args:
image (PIL.Image): a PIL image
Returns:
(PIL.Image): the PIL image with exif orientation applied, if applicable
"""
if not hasattr(image, "getexif"):
return image
try:
exif = image.getexif()
except Exception: # https://github.com/facebookresearch/detectron2/issues/1885
exif = None
if exif is None:
return image
orientation = exif.get(_EXIF_ORIENT)
method = {
2: Image.FLIP_LEFT_RIGHT,
3: Image.ROTATE_180,
4: Image.FLIP_TOP_BOTTOM,
5: Image.TRANSPOSE,
6: Image.ROTATE_270,
7: Image.TRANSVERSE,
8: Image.ROTATE_90,
}.get(orientation)
if method is not None:
return image.transpose(method)
return image
def read_image(file_name, format=None):
"""
Read an image into the given format.
Will apply rotation and flipping if the image has such exif information.
Args:
file_name (str): image file path
format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601".
Returns:
image (np.ndarray):
an HWC image in the given format, which is 0-255, uint8 for
supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601.
"""
with PathManager.open(file_name, "rb") as f:
image = Image.open(f)
# work around this bug: https://github.com/python-pillow/Pillow/issues/3973
image = _apply_exif_orientation(image)
return convert_PIL_to_numpy(image, format)
def check_image_size(dataset_dict, image):
"""
Raise an error if the image does not match the size specified in the dict.
"""
if "width" in dataset_dict or "height" in dataset_dict:
image_wh = (image.shape[1], image.shape[0])
expected_wh = (dataset_dict["width"], dataset_dict["height"])
if not image_wh == expected_wh:
raise SizeMismatchError(
"Mismatched image shape{}, got {}, expect {}.".format(
" for image " + dataset_dict["file_name"]
if "file_name" in dataset_dict
else "",
image_wh,
expected_wh,
)
+ " Please check the width/height in your annotation."
)
# To ensure bbox always remap to original image size
if "width" not in dataset_dict:
dataset_dict["width"] = image.shape[1]
if "height" not in dataset_dict:
dataset_dict["height"] = image.shape[0]
def transform_instance_annotations(
annotation, transforms, image_size, *, keypoint_hflip_indices=None
):
"""
Apply transforms to box, segmentation and keypoints annotations of a single instance.
It will use `transforms.apply_box` for the box, and
`transforms.apply_coords` for segmentation polygons & keypoints.
If you need anything more specially designed for each data structure,
you'll need to implement your own version of this function or the transforms.
Args:
annotation (dict): dict of instance annotations for a single instance.
It will be modified in-place.
transforms (TransformList or list[Transform]):
image_size (tuple): the height, width of the transformed image
keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.
Returns:
dict:
the same input dict with fields "bbox", "segmentation", "keypoints"
transformed according to `transforms`.
The "bbox_mode" field will be set to XYXY_ABS.
"""
if isinstance(transforms, (tuple, list)):
transforms = T.TransformList(transforms)
# if "bbox" in annotation and annotation["bbox"] is not None:
# # bbox is 1d (per-instance bounding box)
# bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS)
# # clip transformed bbox to image size
# bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0)
# annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1])
# annotation["bbox_mode"] = BoxMode.XYXY_ABS
if "segmentation" in annotation:
# each instance contains 1 or more polygons
segm = annotation["segmentation"]
if isinstance(segm, list):
# polygons
polygons = [np.asarray(p).reshape(-1, 2) for p in segm]
annotation["segmentation"] = [
p.reshape(-1) for p in transforms.apply_polygons(polygons)
]
elif isinstance(segm, dict):
# RLE
mask = mask_util.decode(segm)
mask = transforms.apply_segmentation(mask)
assert tuple(mask.shape[:2]) == image_size, f"mask.shape: {mask.shape}, image_size: {image_size}"
annotation["segmentation"] = mask
else:
raise ValueError(
"Cannot transform segmentation of type '{}'!"
"Supported types are: polygons as list[list[float] or ndarray],"
" COCO-style RLE as a dict.".format(type(segm))
)
return annotation
def build_augmentation(cfg, is_train):
"""
Create a list of default :class:`Augmentation` from config.
Now it includes resizing and flipping.
Returns:
list[Augmentation]
"""
if is_train:
min_size = cfg.INPUT.MIN_SIZE_TRAIN
max_size = cfg.INPUT.MAX_SIZE_TRAIN
sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
else:
min_size = cfg.INPUT.MIN_SIZE_TEST
max_size = cfg.INPUT.MAX_SIZE_TEST
sample_style = "choice"
augmentation = [T.ResizeShortestEdge(min_size, max_size, sample_style)]
if is_train and cfg.INPUT.RANDOM_FLIP != "none":
augmentation.append(
T.RandomFlip(
horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal",
vertical=cfg.INPUT.RANDOM_FLIP == "vertical",
)
)
return augmentation
build_transform_gen = build_augmentation
"""
Alias for backward-compatibility.
""" |