| |
| import os |
| import cv2 |
| import math |
| import random |
| import numpy as np |
|
|
| def get_mask_boxes(mask): |
| y_coords, x_coords = np.nonzero(mask) |
| x_min = x_coords.min() |
| x_max = x_coords.max() |
| y_min = y_coords.min() |
| y_max = y_coords.max() |
| bbox = np.array([x_min, y_min, x_max, y_max]).astype(np.int32) |
| return bbox |
|
|
|
|
| def get_aug_mask(body_mask, w_len=10, h_len=20): |
| body_bbox = get_mask_boxes(body_mask) |
|
|
| bbox_wh = body_bbox[2:4] - body_bbox[0:2] |
| w_slice = np.int32(bbox_wh[0] / w_len) |
| h_slice = np.int32(bbox_wh[1] / h_len) |
|
|
| for each_w in range(body_bbox[0], body_bbox[2], w_slice): |
| w_start = min(each_w, body_bbox[2]) |
| w_end = min((each_w + w_slice), body_bbox[2]) |
| for each_h in range(body_bbox[1], body_bbox[3], h_slice): |
| h_start = min(each_h, body_bbox[3]) |
| h_end = min((each_h + h_slice), body_bbox[3]) |
| if body_mask[h_start:h_end, w_start:w_end].sum() > 0: |
| body_mask[h_start:h_end, w_start:w_end] = 1 |
|
|
| return body_mask |
|
|
| def get_mask_body_img(img_copy, hand_mask, k=7, iterations=1): |
| kernel = np.ones((k, k), np.uint8) |
| dilation = cv2.dilate(hand_mask, kernel, iterations=iterations) |
| mask_hand_img = img_copy * (1 - dilation[:, :, None]) |
|
|
| return mask_hand_img, dilation |
|
|
|
|
| def get_face_bboxes(kp2ds, scale, image_shape, ratio_aug): |
| h, w = image_shape |
| kp2ds_face = kp2ds.copy()[23:91, :2] |
|
|
| min_x, min_y = np.min(kp2ds_face, axis=0) |
| max_x, max_y = np.max(kp2ds_face, axis=0) |
|
|
|
|
| initial_width = max_x - min_x |
| initial_height = max_y - min_y |
|
|
| initial_area = initial_width * initial_height |
|
|
| expanded_area = initial_area * scale |
|
|
| new_width = np.sqrt(expanded_area * (initial_width / initial_height)) |
| new_height = np.sqrt(expanded_area * (initial_height / initial_width)) |
|
|
| delta_width = (new_width - initial_width) / 2 |
| delta_height = (new_height - initial_height) / 4 |
|
|
| if ratio_aug: |
| if random.random() > 0.5: |
| delta_width += random.uniform(0, initial_width // 10) |
| else: |
| delta_height += random.uniform(0, initial_height // 10) |
|
|
| expanded_min_x = max(min_x - delta_width, 0) |
| expanded_max_x = min(max_x + delta_width, w) |
| expanded_min_y = max(min_y - 3 * delta_height, 0) |
| expanded_max_y = min(max_y + delta_height, h) |
|
|
| return [int(expanded_min_x), int(expanded_max_x), int(expanded_min_y), int(expanded_max_y)] |
|
|
|
|
| def calculate_new_size(orig_w, orig_h, target_area, divisor=64): |
|
|
| target_ratio = orig_w / orig_h |
|
|
| def check_valid(w, h): |
|
|
| if w <= 0 or h <= 0: |
| return False |
| return (w * h <= target_area and |
| w % divisor == 0 and |
| h % divisor == 0) |
|
|
| def get_ratio_diff(w, h): |
|
|
| return abs(w / h - target_ratio) |
|
|
| def round_to_64(value, round_up=False, divisor=64): |
|
|
| if round_up: |
| return divisor * ((value + (divisor - 1)) // divisor) |
| return divisor * (value // divisor) |
|
|
| possible_sizes = [] |
|
|
| max_area_h = int(np.sqrt(target_area / target_ratio)) |
| max_area_w = int(max_area_h * target_ratio) |
|
|
| max_h = round_to_64(max_area_h, round_up=True, divisor=divisor) |
| max_w = round_to_64(max_area_w, round_up=True, divisor=divisor) |
|
|
| for h in range(divisor, max_h + divisor, divisor): |
| ideal_w = h * target_ratio |
|
|
| w_down = round_to_64(ideal_w) |
| w_up = round_to_64(ideal_w, round_up=True) |
|
|
| for w in [w_down, w_up]: |
| if check_valid(w, h, divisor): |
| possible_sizes.append((w, h, get_ratio_diff(w, h))) |
|
|
| if not possible_sizes: |
| raise ValueError("Can not find suitable size") |
|
|
| possible_sizes.sort(key=lambda x: (-x[0] * x[1], x[2])) |
|
|
| best_w, best_h, _ = possible_sizes[0] |
| return int(best_w), int(best_h) |
|
|
|
|
| def resize_by_area(image, target_area, keep_aspect_ratio=True, divisor=64, padding_color=(0, 0, 0)): |
| h, w = image.shape[:2] |
| try: |
| new_w, new_h = calculate_new_size(w, h, target_area, divisor) |
| except: |
| aspect_ratio = w / h |
|
|
| if keep_aspect_ratio: |
| new_h = math.sqrt(target_area / aspect_ratio) |
| new_w = target_area / new_h |
| else: |
| new_w = new_h = math.sqrt(target_area) |
|
|
| new_w, new_h = int((new_w // divisor) * divisor), int((new_h // divisor) * divisor) |
|
|
| interpolation = cv2.INTER_AREA if (new_w * new_h < w * h) else cv2.INTER_LINEAR |
|
|
| resized_image = padding_resize(image, height=new_h, width=new_w, padding_color=padding_color, |
| interpolation=interpolation) |
| return resized_image |
|
|
|
|
| def padding_resize(img_ori, height=512, width=512, padding_color=(0, 0, 0), interpolation=cv2.INTER_LINEAR): |
| ori_height = img_ori.shape[0] |
| ori_width = img_ori.shape[1] |
| channel = img_ori.shape[2] |
|
|
| img_pad = np.zeros((height, width, channel), dtype=img_ori.dtype) |
| if channel == 1: |
| img_pad[:, :, 0] = padding_color[0] |
| else: |
| img_pad[:, :, 0] = padding_color[0] |
| img_pad[:, :, 1] = padding_color[1] |
| img_pad[:, :, 2] = padding_color[2] |
|
|
| if (ori_height / ori_width) > (height / width): |
| new_width = int(height / ori_height * ori_width) |
| img = cv2.resize(img_ori, (new_width, height), interpolation=interpolation) |
| padding = int((width - new_width) / 2) |
| if len(img.shape) == 2: |
| img = img[:, :, np.newaxis] |
| img_pad[:, padding: padding + new_width, :] = img |
| else: |
| new_height = int(width / ori_width * ori_height) |
| img = cv2.resize(img_ori, (width, new_height), interpolation=interpolation) |
| padding = int((height - new_height) / 2) |
| if len(img.shape) == 2: |
| img = img[:, :, np.newaxis] |
| img_pad[padding: padding + new_height, :, :] = img |
|
|
| return img_pad |
|
|
| def resize_to_bounds(img_ori, height=512, width=512, padding_color=(0, 0, 0), interpolation=cv2.INTER_LINEAR, extra_padding=64, crop_target_image=None): |
| |
| if crop_target_image is not None: |
| ref = crop_target_image |
| if ref.ndim == 2: |
| mask = ref > 0 |
| else: |
| mask = np.any(ref != 0, axis=2) |
| coords = np.argwhere(mask) |
| if coords.size == 0: |
| |
| y0, x0 = 0, 0 |
| y1, x1 = img_ori.shape[0], img_ori.shape[1] |
| else: |
| y0, x0 = coords.min(axis=0) |
| y1, x1 = coords.max(axis=0) + 1 |
| |
| pad_y0 = y0 - extra_padding |
| pad_x0 = x0 - extra_padding |
| pad_y1 = y1 + extra_padding |
| pad_x1 = x1 + extra_padding |
| |
| crop_y0 = max(pad_y0, 0) |
| crop_x0 = max(pad_x0, 0) |
| crop_y1 = min(pad_y1, img_ori.shape[0]) |
| crop_x1 = min(pad_x1, img_ori.shape[1]) |
| crop_img = img_ori[crop_y0:crop_y1, crop_x0:crop_x1] |
| |
| pad_top = crop_y0 - pad_y0 |
| pad_left = crop_x0 - pad_x0 |
| pad_bottom = pad_y1 - crop_y1 |
| pad_right = pad_x1 - crop_x1 |
| if any([pad_top, pad_left, pad_bottom, pad_right]): |
| channel = crop_img.shape[2] if crop_img.ndim == 3 else 1 |
| crop_img = np.pad( |
| crop_img, |
| ((pad_top, pad_bottom), (pad_left, pad_right)) + ((0, 0),) if channel > 1 else ((pad_top, pad_bottom), (pad_left, pad_right)), |
| mode='constant', constant_values=0 |
| ) |
| else: |
| if img_ori.ndim == 2: |
| mask = img_ori > 0 |
| else: |
| mask = np.any(img_ori != 0, axis=2) |
| coords = np.argwhere(mask) |
| if coords.size == 0: |
| |
| crop_img = img_ori |
| else: |
| y0, x0 = coords.min(axis=0) |
| y1, x1 = coords.max(axis=0) + 1 |
| pad_y0 = y0 - extra_padding |
| pad_x0 = x0 - extra_padding |
| pad_y1 = y1 + extra_padding |
| pad_x1 = x1 + extra_padding |
| crop_y0 = max(pad_y0, 0) |
| crop_x0 = max(pad_x0, 0) |
| crop_y1 = min(pad_y1, img_ori.shape[0]) |
| crop_x1 = min(pad_x1, img_ori.shape[1]) |
| crop_img = img_ori[crop_y0:crop_y1, crop_x0:crop_x1] |
| pad_top = crop_y0 - pad_y0 |
| pad_left = crop_x0 - pad_x0 |
| pad_bottom = pad_y1 - crop_y1 |
| pad_right = pad_x1 - crop_x1 |
| if any([pad_top, pad_left, pad_bottom, pad_right]): |
| channel = crop_img.shape[2] if crop_img.ndim == 3 else 1 |
| crop_img = np.pad( |
| crop_img, |
| ((pad_top, pad_bottom), (pad_left, pad_right)) + ((0, 0),) if channel > 1 else ((pad_top, pad_bottom), (pad_left, pad_right)), |
| mode='constant', constant_values=0 |
| ) |
|
|
| ori_height = crop_img.shape[0] |
| ori_width = crop_img.shape[1] |
| channel = crop_img.shape[2] if crop_img.ndim == 3 else 1 |
|
|
| img_pad = np.zeros((height, width, channel), dtype=crop_img.dtype) |
| if channel == 1: |
| img_pad[:, :, 0] = padding_color[0] |
| else: |
| for c in range(channel): |
| img_pad[:, :, c] = padding_color[c % len(padding_color)] |
|
|
| |
| crop_aspect = ori_width / ori_height |
| target_aspect = width / height |
| if crop_aspect > target_aspect: |
| new_width = width |
| new_height = int(width / crop_aspect) |
| else: |
| new_height = height |
| new_width = int(height * crop_aspect) |
| img = cv2.resize(crop_img, (new_width, new_height), interpolation=interpolation) |
| if img.ndim == 2: |
| img = img[:, :, np.newaxis] |
| y_pad = (height - new_height) // 2 |
| x_pad = (width - new_width) // 2 |
| img_pad[y_pad:y_pad + new_height, x_pad:x_pad + new_width, :] = img |
|
|
| return img_pad |
|
|
|
|
| def get_frame_indices(frame_num, video_fps, clip_length, train_fps): |
|
|
| start_frame = 0 |
| times = np.arange(0, clip_length) / train_fps |
| frame_indices = start_frame + np.round(times * video_fps).astype(int) |
| frame_indices = np.clip(frame_indices, 0, frame_num - 1) |
|
|
| return frame_indices.tolist() |
|
|
|
|
| def get_face_bboxes(kp2ds, scale, image_shape): |
| h, w = image_shape |
| kp2ds_face = kp2ds.copy()[1:] * (w, h) |
|
|
| min_x, min_y = np.min(kp2ds_face, axis=0) |
| max_x, max_y = np.max(kp2ds_face, axis=0) |
|
|
| initial_width = max_x - min_x |
| initial_height = max_y - min_y |
|
|
| initial_area = initial_width * initial_height |
|
|
| expanded_area = initial_area * scale |
|
|
| new_width = np.sqrt(expanded_area * (initial_width / initial_height)) |
| new_height = np.sqrt(expanded_area * (initial_height / initial_width)) |
|
|
| delta_width = (new_width - initial_width) / 2 |
| delta_height = (new_height - initial_height) / 4 |
|
|
| expanded_min_x = max(min_x - delta_width, 0) |
| expanded_max_x = min(max_x + delta_width, w) |
| expanded_min_y = max(min_y - 3 * delta_height, 0) |
| expanded_max_y = min(max_y + delta_height, h) |
|
|
| return [int(expanded_min_x), int(expanded_max_x), int(expanded_min_y), int(expanded_max_y)] |