| | |
| | |
| |
|
| | """ |
| | See "Data Augmentation" tutorial for an overview of the system: |
| | https://detectron2.readthedocs.io/tutorials/augmentation.html |
| | """ |
| |
|
| | import numpy as np |
| | import torch |
| | import torch.nn.functional as F |
| | from fvcore.transforms.transform import ( |
| | CropTransform, |
| | HFlipTransform, |
| | NoOpTransform, |
| | Transform, |
| | TransformList, |
| | ) |
| | from PIL import Image |
| |
|
| | try: |
| | import cv2 |
| | except ImportError: |
| | |
| | pass |
| |
|
| | __all__ = [ |
| | "ExtentTransform", |
| | "ResizeTransform", |
| | "RotationTransform", |
| | "ColorTransform", |
| | "PILColorTransform", |
| | ] |
| |
|
| |
|
| | class ExtentTransform(Transform): |
| | """ |
| | Extracts a subregion from the source image and scales it to the output size. |
| | |
| | The fill color is used to map pixels from the source rect that fall outside |
| | the source image. |
| | |
| | See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform |
| | """ |
| |
|
| | def __init__(self, src_rect, output_size, interp=Image.BILINEAR, fill=0): |
| | """ |
| | Args: |
| | src_rect (x0, y0, x1, y1): src coordinates |
| | output_size (h, w): dst image size |
| | interp: PIL interpolation methods |
| | fill: Fill color used when src_rect extends outside image |
| | """ |
| | super().__init__() |
| | self._set_attributes(locals()) |
| |
|
| | def apply_image(self, img, interp=None): |
| | h, w = self.output_size |
| | if len(img.shape) > 2 and img.shape[2] == 1: |
| | pil_image = Image.fromarray(img[:, :, 0], mode="L") |
| | else: |
| | pil_image = Image.fromarray(img) |
| | pil_image = pil_image.transform( |
| | size=(w, h), |
| | method=Image.EXTENT, |
| | data=self.src_rect, |
| | resample=interp if interp else self.interp, |
| | fill=self.fill, |
| | ) |
| | ret = np.asarray(pil_image) |
| | if len(img.shape) > 2 and img.shape[2] == 1: |
| | ret = np.expand_dims(ret, -1) |
| | return ret |
| |
|
| | def apply_coords(self, coords): |
| | |
| | |
| | h, w = self.output_size |
| | x0, y0, x1, y1 = self.src_rect |
| | new_coords = coords.astype(np.float32) |
| | new_coords[:, 0] -= 0.5 * (x0 + x1) |
| | new_coords[:, 1] -= 0.5 * (y0 + y1) |
| | new_coords[:, 0] *= w / (x1 - x0) |
| | new_coords[:, 1] *= h / (y1 - y0) |
| | new_coords[:, 0] += 0.5 * w |
| | new_coords[:, 1] += 0.5 * h |
| | return new_coords |
| |
|
| | def apply_segmentation(self, segmentation): |
| | segmentation = self.apply_image(segmentation, interp=Image.NEAREST) |
| | return segmentation |
| |
|
| |
|
| | class ResizeTransform(Transform): |
| | """ |
| | Resize the image to a target size. |
| | """ |
| |
|
| | def __init__(self, h, w, new_h, new_w, interp=None): |
| | """ |
| | Args: |
| | h, w (int): original image size |
| | new_h, new_w (int): new image size |
| | interp: PIL interpolation methods, defaults to bilinear. |
| | """ |
| | |
| | super().__init__() |
| | if interp is None: |
| | interp = Image.BILINEAR |
| | self._set_attributes(locals()) |
| |
|
| | def apply_image(self, img, interp=None): |
| | assert img.shape[:2] == (self.h, self.w) |
| | assert len(img.shape) <= 4 |
| | interp_method = interp if interp is not None else self.interp |
| |
|
| | if img.dtype == np.uint8: |
| | if len(img.shape) > 2 and img.shape[2] == 1: |
| | pil_image = Image.fromarray(img[:, :, 0], mode="L") |
| | else: |
| | pil_image = Image.fromarray(img) |
| | pil_image = pil_image.resize((self.new_w, self.new_h), interp_method) |
| | ret = np.asarray(pil_image) |
| | if len(img.shape) > 2 and img.shape[2] == 1: |
| | ret = np.expand_dims(ret, -1) |
| | else: |
| | |
| | if any(x < 0 for x in img.strides): |
| | img = np.ascontiguousarray(img) |
| | img = torch.from_numpy(img) |
| | shape = list(img.shape) |
| | shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:] |
| | img = img.view(shape_4d).permute(2, 3, 0, 1) |
| | _PIL_RESIZE_TO_INTERPOLATE_MODE = { |
| | Image.NEAREST: "nearest", |
| | Image.BILINEAR: "bilinear", |
| | Image.BICUBIC: "bicubic", |
| | } |
| | mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method] |
| | align_corners = None if mode == "nearest" else False |
| | img = F.interpolate( |
| | img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners |
| | ) |
| | shape[:2] = (self.new_h, self.new_w) |
| | ret = img.permute(2, 3, 0, 1).view(shape).numpy() |
| |
|
| | return ret |
| |
|
| | def apply_coords(self, coords): |
| | coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w) |
| | coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h) |
| | return coords |
| |
|
| | def apply_segmentation(self, segmentation): |
| | segmentation = self.apply_image(segmentation, interp=Image.NEAREST) |
| | return segmentation |
| |
|
| | def inverse(self): |
| | return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp) |
| |
|
| |
|
| | class RotationTransform(Transform): |
| | """ |
| | This method returns a copy of this image, rotated the given |
| | number of degrees counter clockwise around its center. |
| | """ |
| |
|
| | def __init__(self, h, w, angle, expand=True, center=None, interp=None): |
| | """ |
| | Args: |
| | h, w (int): original image size |
| | angle (float): degrees for rotation |
| | expand (bool): choose if the image should be resized to fit the whole |
| | rotated image (default), or simply cropped |
| | center (tuple (width, height)): coordinates of the rotation center |
| | if left to None, the center will be fit to the center of each image |
| | center has no effect if expand=True because it only affects shifting |
| | interp: cv2 interpolation method, default cv2.INTER_LINEAR |
| | """ |
| | super().__init__() |
| | image_center = np.array((w / 2, h / 2)) |
| | if center is None: |
| | center = image_center |
| | if interp is None: |
| | interp = cv2.INTER_LINEAR |
| | abs_cos, abs_sin = (abs(np.cos(np.deg2rad(angle))), abs(np.sin(np.deg2rad(angle)))) |
| | if expand: |
| | |
| | bound_w, bound_h = np.rint( |
| | [h * abs_sin + w * abs_cos, h * abs_cos + w * abs_sin] |
| | ).astype(int) |
| | else: |
| | bound_w, bound_h = w, h |
| |
|
| | self._set_attributes(locals()) |
| | self.rm_coords = self.create_rotation_matrix() |
| | |
| | self.rm_image = self.create_rotation_matrix(offset=-0.5) |
| |
|
| | def apply_image(self, img, interp=None): |
| | """ |
| | img should be a numpy array, formatted as Height * Width * Nchannels |
| | """ |
| | if len(img) == 0 or self.angle % 360 == 0: |
| | return img |
| | assert img.shape[:2] == (self.h, self.w) |
| | interp = interp if interp is not None else self.interp |
| | return cv2.warpAffine(img, self.rm_image, (self.bound_w, self.bound_h), flags=interp) |
| |
|
| | def apply_coords(self, coords): |
| | """ |
| | coords should be a N * 2 array-like, containing N couples of (x, y) points |
| | """ |
| | coords = np.asarray(coords, dtype=float) |
| | if len(coords) == 0 or self.angle % 360 == 0: |
| | return coords |
| | return cv2.transform(coords[:, np.newaxis, :], self.rm_coords)[:, 0, :] |
| |
|
| | def apply_segmentation(self, segmentation): |
| | segmentation = self.apply_image(segmentation, interp=cv2.INTER_NEAREST) |
| | return segmentation |
| |
|
| | def create_rotation_matrix(self, offset=0): |
| | center = (self.center[0] + offset, self.center[1] + offset) |
| | rm = cv2.getRotationMatrix2D(tuple(center), self.angle, 1) |
| | if self.expand: |
| | |
| | |
| | rot_im_center = cv2.transform(self.image_center[None, None, :] + offset, rm)[0, 0, :] |
| | new_center = np.array([self.bound_w / 2, self.bound_h / 2]) + offset - rot_im_center |
| | |
| | rm[:, 2] += new_center |
| | return rm |
| |
|
| | def inverse(self): |
| | """ |
| | The inverse is to rotate it back with expand, and crop to get the original shape. |
| | """ |
| | if not self.expand: |
| | raise NotImplementedError() |
| | rotation = RotationTransform( |
| | self.bound_h, self.bound_w, -self.angle, True, None, self.interp |
| | ) |
| | crop = CropTransform( |
| | (rotation.bound_w - self.w) // 2, (rotation.bound_h - self.h) // 2, self.w, self.h |
| | ) |
| | return TransformList([rotation, crop]) |
| |
|
| |
|
| | class ColorTransform(Transform): |
| | """ |
| | Generic wrapper for any photometric transforms. |
| | These transformations should only affect the color space and |
| | not the coordinate space of the image (e.g. annotation |
| | coordinates such as bounding boxes should not be changed) |
| | """ |
| |
|
| | def __init__(self, op): |
| | """ |
| | Args: |
| | op (Callable): operation to be applied to the image, |
| | which takes in an ndarray and returns an ndarray. |
| | """ |
| | if not callable(op): |
| | raise ValueError("op parameter should be callable") |
| | super().__init__() |
| | self._set_attributes(locals()) |
| |
|
| | def apply_image(self, img): |
| | return self.op(img) |
| |
|
| | def apply_coords(self, coords): |
| | return coords |
| |
|
| | def inverse(self): |
| | return NoOpTransform() |
| |
|
| | def apply_segmentation(self, segmentation): |
| | return segmentation |
| |
|
| |
|
| | class PILColorTransform(ColorTransform): |
| | """ |
| | Generic wrapper for PIL Photometric image transforms, |
| | which affect the color space and not the coordinate |
| | space of the image |
| | """ |
| |
|
| | def __init__(self, op): |
| | """ |
| | Args: |
| | op (Callable): operation to be applied to the image, |
| | which takes in a PIL Image and returns a transformed |
| | PIL Image. |
| | For reference on possible operations see: |
| | - https://pillow.readthedocs.io/en/stable/ |
| | """ |
| | if not callable(op): |
| | raise ValueError("op parameter should be callable") |
| | super().__init__(op) |
| |
|
| | def apply_image(self, img): |
| | img = Image.fromarray(img) |
| | return np.asarray(super().apply_image(img)) |
| |
|
| |
|
| | def HFlip_rotated_box(transform, rotated_boxes): |
| | """ |
| | Apply the horizontal flip transform on rotated boxes. |
| | |
| | Args: |
| | rotated_boxes (ndarray): Nx5 floating point array of |
| | (x_center, y_center, width, height, angle_degrees) format |
| | in absolute coordinates. |
| | """ |
| | |
| | rotated_boxes[:, 0] = transform.width - rotated_boxes[:, 0] |
| | |
| | rotated_boxes[:, 4] = -rotated_boxes[:, 4] |
| | return rotated_boxes |
| |
|
| |
|
| | def Resize_rotated_box(transform, rotated_boxes): |
| | """ |
| | Apply the resizing transform on rotated boxes. For details of how these (approximation) |
| | formulas are derived, please refer to :meth:`RotatedBoxes.scale`. |
| | |
| | Args: |
| | rotated_boxes (ndarray): Nx5 floating point array of |
| | (x_center, y_center, width, height, angle_degrees) format |
| | in absolute coordinates. |
| | """ |
| | scale_factor_x = transform.new_w * 1.0 / transform.w |
| | scale_factor_y = transform.new_h * 1.0 / transform.h |
| | rotated_boxes[:, 0] *= scale_factor_x |
| | rotated_boxes[:, 1] *= scale_factor_y |
| | theta = rotated_boxes[:, 4] * np.pi / 180.0 |
| | c = np.cos(theta) |
| | s = np.sin(theta) |
| | rotated_boxes[:, 2] *= np.sqrt(np.square(scale_factor_x * c) + np.square(scale_factor_y * s)) |
| | rotated_boxes[:, 3] *= np.sqrt(np.square(scale_factor_x * s) + np.square(scale_factor_y * c)) |
| | rotated_boxes[:, 4] = np.arctan2(scale_factor_x * s, scale_factor_y * c) * 180 / np.pi |
| |
|
| | return rotated_boxes |
| |
|
| |
|
| | HFlipTransform.register_type("rotated_box", HFlip_rotated_box) |
| | ResizeTransform.register_type("rotated_box", Resize_rotated_box) |
| |
|
| | |
| | NoOpTransform.register_type("rotated_box", lambda t, x: x) |
| |
|