# -*- coding: utf-8 -*- """ Generic Image Transform utillities. """ import cv2 import random, math import numpy as np from collections.abc import Iterable from torch import rand import torch.nn.functional as F from torch.autograd import Variable class ResizePad: """ Resize and pad an image to given size. """ def __init__(self, size): if not isinstance(size, (int, Iterable)): raise TypeError('Got inappropriate size arg: {}'.format(size)) self.h, self.w = size def __call__(self, img): h, w = img.shape[:2] scale = min(self.h / h, self.w / w) resized_h = int(np.round(h * scale)) resized_w = int(np.round(w * scale)) pad_h = int(np.floor(self.h - resized_h) / 2) pad_w = int(np.floor(self.w - resized_w) / 2) resized_img = cv2.resize(img, (resized_w, resized_h)) # if img.ndim > 2: if img.ndim > 2: new_img = np.zeros( (self.h, self.w, img.shape[-1]), dtype=resized_img.dtype) else: resized_img = np.expand_dims(resized_img, -1) new_img = np.zeros((self.h, self.w, 1), dtype=resized_img.dtype) new_img[pad_h: pad_h + resized_h, pad_w: pad_w + resized_w, ...] = resized_img return new_img class CropResize: """Remove padding and resize image to its original size.""" def __call__(self, img, size): if not isinstance(size, (int, Iterable)): raise TypeError('Got inappropriate size arg: {}'.format(size)) im_h, im_w = img.data.shape[:2] input_h, input_w = size scale = max(input_h / im_h, input_w / im_w) # scale = torch.Tensor([[input_h / im_h, input_w / im_w]]).max() resized_h = int(np.round(im_h * scale)) # resized_h = torch.round(im_h * scale) resized_w = int(np.round(im_w * scale)) # resized_w = torch.round(im_w * scale) crop_h = int(np.floor(resized_h - input_h) / 2) # crop_h = torch.floor(resized_h - input_h) // 2 crop_w = int(np.floor(resized_w - input_w) / 2) # crop_w = torch.floor(resized_w - input_w) // 2 # resized_img = cv2.resize(img, (resized_w, resized_h)) resized_img = F.upsample( img.unsqueeze(0).unsqueeze(0), size=(resized_h, resized_w), mode='bilinear') resized_img = resized_img.squeeze().unsqueeze(0) return resized_img[0, crop_h: crop_h + input_h, crop_w: crop_w + input_w] class ResizeImage: """Resize the largest of the sides of the image to a given size""" def __init__(self, size): if not isinstance(size, (int, Iterable)): raise TypeError('Got inappropriate size arg: {}'.format(size)) self.size = size def __call__(self, img): im_h, im_w = img.shape[-2:] scale = min(self.size / im_h, self.size / im_w) resized_h = int(np.round(im_h * scale)) resized_w = int(np.round(im_w * scale)) out = F.upsample( Variable(img).unsqueeze(0), size=(resized_h, resized_w), mode='bilinear').squeeze().data return out class ResizeAnnotation: """Resize the largest of the sides of the annotation to a given size""" def __init__(self, size): if not isinstance(size, (int, Iterable)): raise TypeError('Got inappropriate size arg: {}'.format(size)) self.size = size def __call__(self, img): im_h, im_w = img.shape[-2:] scale = min(self.size / im_h, self.size / im_w) resized_h = int(np.round(im_h * scale)) resized_w = int(np.round(im_w * scale)) out = F.upsample( Variable(img).unsqueeze(0).unsqueeze(0), size=(resized_h, resized_w), mode='bilinear').squeeze().data return out class ToNumpy: """Transform an torch.*Tensor to an numpy ndarray.""" def __call__(self, x): return x.numpy() def letterbox(img, mask, height, color=(123.7, 116.3, 103.5)): # resize a rectangular image to a padded square shape = img.shape[:2] # shape = [height, width] ratio = float(height) / max(shape) # ratio = old / new new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) dw = (height - new_shape[0]) / 2 # width padding dh = (height - new_shape[1]) / 2 # height padding top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square if mask is not None: mask = cv2.resize(mask, new_shape, interpolation=cv2.INTER_NEAREST) # resized, no border mask = cv2.copyMakeBorder(mask, top, bottom, left, right, cv2.BORDER_CONSTANT, value=0) # padded square return img, mask, ratio, dw, dh def random_affine(img, mask, targets, degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2), borderValue=(123.7, 116.3, 103.5), all_bbox=None): border = 0 # width of added border (optional) height = max(img.shape[0], img.shape[1]) + border * 2 # Rotation and Scale R = np.eye(3) a = random.random() * (degrees[1] - degrees[0]) + degrees[0] # a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations s = random.random() * (scale[1] - scale[0]) + scale[0] R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s) # Translation T = np.eye(3) T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border # x translation (pixels) T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border # y translation (pixels) # Shear S = np.eye(3) S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # x shear (deg) S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # y shear (deg) M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!! imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR, borderValue=borderValue) # BGR order borderValue if mask is not None: maskw = cv2.warpPerspective(mask, M, dsize=(height, height), flags=cv2.INTER_NEAREST, borderValue=0) # BGR order borderValue else: maskw = None # Return warped points also if type(targets)==type([1]): targetlist=[] for bbox in targets: targetlist.append(wrap_points(bbox, M, height, a)) return imw, maskw, targetlist, M elif all_bbox is not None: targets = wrap_points(targets, M, height, a) for ii in range(all_bbox.shape[0]): all_bbox[ii,:] = wrap_points(all_bbox[ii,:], M, height, a) return imw, maskw, targets, all_bbox, M elif targets is not None: ## previous main targets = wrap_points(targets, M, height, a) return imw, maskw, targets, M else: return imw def wrap_points(targets, M, height, a): # n = targets.shape[0] # points = targets[:, 1:5].copy() points = targets.copy() # area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1]) area0 = (points[2] - points[0]) * (points[3] - points[1]) # warp points xy = np.ones((4, 3)) xy[:, :2] = points[[0, 1, 2, 3, 0, 3, 2, 1]].reshape(4, 2) # x1y1, x2y2, x1y2, x2y1 xy = (xy @ M.T)[:, :2].reshape(1, 8) # create new boxes x = xy[:, [0, 2, 4, 6]] y = xy[:, [1, 3, 5, 7]] xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, 1).T # apply angle-based reduction radians = a * math.pi / 180 reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 x = (xy[:, 2] + xy[:, 0]) / 2 y = (xy[:, 3] + xy[:, 1]) / 2 w = (xy[:, 2] - xy[:, 0]) * reduction h = (xy[:, 3] - xy[:, 1]) * reduction xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, 1).T # reject warped points outside of image np.clip(xy, 0, height, out=xy) w = xy[:, 2] - xy[:, 0] h = xy[:, 3] - xy[:, 1] area = w * h ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10) ## print(targets, xy) ## [ 56 36 108 210] [[ 47.80464857 15.6096533 106.30993434 196.71267693]] # targets = targets[i] # targets[:, 1:5] = xy[i] targets = xy[0] return targets def random_crop(img, seg, pad, h, w): if random.random() < 0.5: return img, seg img = cv2.copyMakeBorder(img, pad, pad, pad, pad, cv2.BORDER_CONSTANT, value=(123.7, 116.3, 103.5)) seg = cv2.copyMakeBorder(seg, pad, pad, pad, pad, cv2.BORDER_CONSTANT, value=(0, 0, 0)) Left = random.randint(0, pad * 2) Top = random.randint(0, pad * 2) seg_pixel = seg.sum() for _ in range(100): if seg[Top: Top + h, Left: Left + w].sum() / seg_pixel > 0.95 and seg[Top: Top + h, Left: Left + w].sum() > 0: img = img[Top: Top + h, Left: Left + w, :] seg = seg[Top: Top + h, Left: Left + w] return img, seg Left = random.randint(0, pad * 2) Top = random.randint(0, pad * 2) return img, seg def random_copy(img, seg, phrase, bbox): if 'left' in phrase or 'right' in phrase or \ 'center' in phrase or 'middle' in phrase or \ 'front' in phrase or 'back' in phrase: return img, seg, phrase, bbox if random.random() < 0.75: return img, seg, phrase, bbox h, w = img.shape[0], img.shape[1] # x1, y1, x2, y2 = w, h, 0, 0 # for j in range(h): # for i in range(w): # if seg[j, i] > 0: # if i < x1: x1 = i # if j < y1: y1 = j # if i > x2: x2 = i # if j > y2: y2 = j # x2 = x2 + 1 # y2 = y2 + 1 # contours, hierarchy = cv2.findContours(seg.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) # c = max(contours, key = cv2.contourArea) x, y, bboxw, bboxh = cv2.boundingRect(seg.astype(np.uint8)) x1 = x y1 = y x2 = x + bboxw y2 = y + bboxh if x1 - (x2 - x1) < 0 or w - (x2 - x1) < x2: return img, seg, phrase, bbox # tmp = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # color_mask = np.array([0, 255, 0], dtype=np.uint8) # mask = seg.astype(np.bool) # tmp[mask] = tmp[mask] * 0.5 + color_mask * 0.5 # cv2.imwrite('./{}.png'.format(phrase.replace(' ', '_')), tmp) if random.random() < 0.5: new_x1 = random.randint(0, x1 - (x2 - x1)) phrase += ' on left' else: new_x1 = random.randint(x2, w - (x2 - x1)) phrase += ' on right' new_x2 = new_x1 + (x2 - x1) delta_y = random.randint((y1 - y2), y2 - y1) while y2 + delta_y > h or y1 + delta_y < 0: delta_y = random.randint((y1 - y2), y2 - y1) new_y1 = y1 + delta_y new_y2 = y2 + delta_y new_seg = np.zeros_like(seg) new_seg[new_y1: new_y2, new_x1: new_x2] = seg[y1: y2, x1: x2] # tmp = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # color_mask = np.array([0, 255, 0], dtype=np.uint8) # mask = new_seg.astype(np.bool) # tmp[mask] = tmp[mask] * 0.5 + color_mask * 0.5 # cv2.imwrite('./{}.png'.format(phrase.replace(' ', '_')), tmp) img[new_seg.astype(np.bool)] = img[seg.astype(np.bool)] # bbox = [new_x1, new_y1, new_x2 - 1, new_y2 - 1] seg = new_seg # tmp = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # color_mask = np.array([0, 255, 0], dtype=np.uint8) # mask = seg.astype(np.bool) # tmp[mask] = tmp[mask] * 0.5 + color_mask * 0.5 # cv2.imwrite('./{}.png'.format(phrase.replace(' ', '_')), tmp) # exit() return img, seg, phrase, bbox def random_erase(img, seg): if random.random() < 0.5: return img, seg x, y, bboxw, bboxh = cv2.boundingRect(seg.astype(np.uint8)) area = bboxw * bboxh * 0.5 for attempt in range(100): target_area = random.uniform(0.02, 0.4) aspect_ratio = random.uniform(0.3, 1/0.3) h = int(round(math.sqrt(target_area * aspect_ratio))) w = int(round(math.sqrt(target_area / aspect_ratio))) if w < bboxw and h < bboxh: x1 = random.randint(0, bboxw - w) y1 = random.randint(0, bboxh - h) new_seg = seg.copy() new_seg[y+y1: y+y1+h, x+x1: x+x1+w] = 0 if new_seg.sum() / seg.sum() > 0.75: continue seg[y+y1: y+y1+h, x+x1: x+x1+w] = 0 img[y+y1: y+y1+h, x+x1: x+x1+w, 0] = 123.7 img[y+y1: y+y1+h, x+x1: x+x1+w, 1] = 116.3 img[y+y1: y+y1+h, x+x1: x+x1+w, 2] = 103.5 # tmp = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # color_mask = np.array([0, 255, 0], dtype=np.uint8) # mask = seg.astype(np.bool) # tmp[mask] = tmp[mask] * 0.5 + color_mask * 0.5 # cv2.imwrite('./erase.png', tmp) return img, seg return img, seg