| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| """ |
| Data augmentation functionality. Passed as callable transformations to |
| Dataset classes. |
| |
| The data augmentation procedures were interpreted from @weiliu89's SSD paper |
| http://arxiv.org/abs/1512.02325 |
| """ |
|
|
| import math |
| import random |
|
|
| import cv2 |
| import numpy as np |
|
|
| from object_detection.pt.src.utils.yolod import xyxy2cxcywh |
|
|
|
|
| def augment_hsv(img, hgain=5, sgain=30, vgain=30): |
| hsv_augs = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] |
| hsv_augs *= np.random.randint(0, 2, 3) |
| hsv_augs = hsv_augs.astype(np.int16) |
| img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16) |
|
|
| img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180 |
| img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255) |
| img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255) |
|
|
| cv2.cvtColor(img_hsv.astype(img.dtype), cv2.COLOR_HSV2BGR, dst=img) |
|
|
|
|
| def get_aug_params(value, center=0): |
| if isinstance(value, float): |
| return random.uniform(center - value, center + value) |
| elif len(value) == 2: |
| return random.uniform(value[0], value[1]) |
| else: |
| raise ValueError( |
| "Affine params should be either a sequence containing two values\ |
| or single float values. Got {}".format(value) |
| ) |
|
|
|
|
| def get_affine_matrix( |
| target_size, |
| degrees=10, |
| translate=0.1, |
| scales=0.1, |
| shear=10, |
| ): |
| twidth, theight = target_size |
|
|
| |
| angle = get_aug_params(degrees) |
| scale = get_aug_params(scales, center=1.0) |
|
|
| if scale <= 0.0: |
| raise ValueError("Argument scale should be positive") |
|
|
| R = cv2.getRotationMatrix2D(angle=angle, center=(0, 0), scale=scale) |
|
|
| M = np.ones([2, 3]) |
| |
| shear_x = math.tan(get_aug_params(shear) * math.pi / 180) |
| shear_y = math.tan(get_aug_params(shear) * math.pi / 180) |
|
|
| M[0] = R[0] + shear_y * R[1] |
| M[1] = R[1] + shear_x * R[0] |
|
|
| |
| translation_x = get_aug_params(translate) * twidth |
| translation_y = get_aug_params(translate) * theight |
|
|
| M[0, 2] = translation_x |
| M[1, 2] = translation_y |
|
|
| return M, scale |
|
|
|
|
| def apply_affine_to_bboxes(targets, target_size, M, scale): |
| num_gts = len(targets) |
|
|
| |
| twidth, theight = target_size |
| corner_points = np.ones((4 * num_gts, 3)) |
| corner_points[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape( |
| 4 * num_gts, 2 |
| ) |
| corner_points = corner_points @ M.T |
| corner_points = corner_points.reshape(num_gts, 8) |
|
|
| |
| corner_xs = corner_points[:, 0::2] |
| corner_ys = corner_points[:, 1::2] |
| new_bboxes = ( |
| np.concatenate( |
| (corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1)) |
| ) |
| .reshape(4, num_gts) |
| .T |
| ) |
|
|
| |
| new_bboxes[:, 0::2] = new_bboxes[:, 0::2].clip(0, twidth) |
| new_bboxes[:, 1::2] = new_bboxes[:, 1::2].clip(0, theight) |
|
|
| targets[:, :4] = new_bboxes |
|
|
| return targets |
|
|
|
|
| def random_affine( |
| img, |
| targets=(), |
| target_size=(640, 640), |
| degrees=10, |
| translate=0.1, |
| scales=0.1, |
| shear=10, |
| ): |
| M, scale = get_affine_matrix(target_size, degrees, translate, scales, shear) |
|
|
| img = cv2.warpAffine(img, M, dsize=target_size, borderValue=(114, 114, 114)) |
|
|
| |
| if len(targets) > 0: |
| targets = apply_affine_to_bboxes(targets, target_size, M, scale) |
|
|
| return img, targets |
|
|
|
|
| def _mirror(image, boxes, prob=0.5): |
| _, width, _ = image.shape |
| if random.random() < prob: |
| image = image[:, ::-1] |
| boxes[:, 0::2] = width - boxes[:, 2::-2] |
| return image, boxes |
|
|
|
|
| def preproc(img, input_size, swap=(2, 0, 1)): |
| if len(img.shape) == 3: |
| padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114 |
| else: |
| padded_img = np.ones(input_size, dtype=np.uint8) * 114 |
|
|
| r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) |
| resized_img = cv2.resize( |
| img, |
| (int(img.shape[1] * r), int(img.shape[0] * r)), |
| interpolation=cv2.INTER_LINEAR, |
| ).astype(np.uint8) |
| padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img |
|
|
| padded_img = padded_img.transpose(swap) |
| padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) |
| return padded_img, r |
|
|
|
|
| class TrainTransform: |
| def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0): |
| self.max_labels = max_labels |
| self.flip_prob = flip_prob |
| self.hsv_prob = hsv_prob |
|
|
| def __call__(self, image, targets, input_dim): |
| boxes = targets[:, :4].copy() |
| labels = targets[:, 4].copy() |
| if len(boxes) == 0: |
| targets = np.zeros((self.max_labels, 5), dtype=np.float32) |
| image, r_o = preproc(image, input_dim) |
| return image, targets |
|
|
| image_o = image.copy() |
| targets_o = targets.copy() |
| height_o, width_o, _ = image_o.shape |
| boxes_o = targets_o[:, :4] |
| labels_o = targets_o[:, 4] |
| |
| boxes_o = xyxy2cxcywh(boxes_o) |
|
|
| if random.random() < self.hsv_prob: |
| augment_hsv(image) |
| image_t, boxes = _mirror(image, boxes, self.flip_prob) |
| height, width, _ = image_t.shape |
| image_t, r_ = preproc(image_t, input_dim) |
| |
| boxes = xyxy2cxcywh(boxes) |
| boxes *= r_ |
|
|
| mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1 |
| boxes_t = boxes[mask_b] |
| labels_t = labels[mask_b] |
|
|
| if len(boxes_t) == 0: |
| image_t, r_o = preproc(image_o, input_dim) |
| boxes_o *= r_o |
| boxes_t = boxes_o |
| labels_t = labels_o |
|
|
| labels_t = np.expand_dims(labels_t, 1) |
|
|
| targets_t = np.hstack((labels_t, boxes_t)) |
| padded_labels = np.zeros((self.max_labels, 5)) |
| padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[ |
| : self.max_labels |
| ] |
| padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32) |
| return image_t, padded_labels |
|
|
|
|
| class PredTransform: |
| def __init__(self, swap=(2, 0, 1), legacy=False): |
| self.swap = swap |
| self.legacy = legacy |
|
|
| |
| def __call__(self, img, res=None, input_size=None): |
| img, _ = preproc(img, input_size, self.swap) |
| if self.legacy: |
| img = img[::-1, :, :].copy() |
| img /= 255.0 |
| img -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1) |
| img /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1) |
| return img, np.zeros((1, 5)) |
|
|
|
|
| class ValTransform: |
| |
| def __init__(self, swap=(2, 0, 1), legacy=False, max_labels=50): |
| self.max_labels = max_labels |
| self.swap = swap |
| self.legacy = legacy |
|
|
| def __call__(self, image, targets, input_dim): |
| boxes = targets[:, :4].copy() |
| labels = targets[:, 4].copy() |
| if len(boxes) == 0: |
| targets = np.zeros((self.max_labels, 5), dtype=np.float32) |
| image, r_o = preproc(image, input_dim) |
| return image, targets |
|
|
| image_o = image.copy() |
| targets_o = targets.copy() |
|
|
| boxes_o = targets_o[:, :4] |
| labels_o = targets_o[:, 4] |
| boxes_o = xyxy2cxcywh(boxes_o) |
|
|
| image_t, r_ = preproc(image_o, input_dim) |
| |
| boxes = xyxy2cxcywh(boxes) |
| boxes *= r_ |
|
|
| mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1 |
| boxes_t = boxes[mask_b] |
| labels_t = labels[mask_b] |
|
|
| if len(boxes_t) == 0: |
| image_t, r_o = preproc(image_o, input_dim) |
| boxes_o *= r_o |
| boxes_t = boxes_o |
| labels_t = labels_o |
|
|
| labels_t = np.expand_dims(labels_t, 1) |
|
|
| targets_t = np.hstack((labels_t, boxes_t)) |
| padded_labels = np.zeros((self.max_labels, 5)) |
| padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[ |
| : self.max_labels |
| ] |
| padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32) |
| return image_t, padded_labels |