File size: 4,283 Bytes
355b5d6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | # Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
import os
import random
import numpy as np
from PIL import Image, ImageOps
from PIL.ImageFilter import GaussianBlur
import cv2
import torch
import json
from torch.utils.data import Dataset
import torchvision.transforms as transforms
def crop_image(img, rect):
x, y, w, h = rect
left = abs(x) if x < 0 else 0
top = abs(y) if y < 0 else 0
right = abs(img.shape[1]-(x+w)) if x + w >= img.shape[1] else 0
bottom = abs(img.shape[0]-(y+h)) if y + h >= img.shape[0] else 0
if img.shape[2] == 4:
color = [0, 0, 0, 0]
else:
color = [0, 0, 0]
new_img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
x = x + left
y = y + top
return new_img[y:(y+h),x:(x+w),:]
class EvalDataset(Dataset):
@staticmethod
def modify_commandline_options(parser, is_train):
return parser
def __init__(self, opt, projection='orthogonal'):
self.opt = opt
self.projection_mode = projection
self.root = self.opt.dataroot
self.img_files = sorted([os.path.join(self.root,f) for f in os.listdir(self.root) if f.split('.')[-1] in ['png', 'jpeg', 'jpg', 'PNG', 'JPG', 'JPEG'] and os.path.exists(os.path.join(self.root,f.replace('.%s' % (f.split('.')[-1]), '_rect.txt')))])
self.IMG = os.path.join(self.root)
self.phase = 'val'
self.load_size = self.opt.loadSize
# PIL to tensor
self.to_tensor = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# only used in case of multi-person processing
self.person_id = 0
def __len__(self):
return len(self.img_files)
def get_n_person(self, index):
rect_path = self.img_files[index].replace('.%s' % (self.img_files[index].split('.')[-1]), '_rect.txt')
rects = np.loadtxt(rect_path, dtype=np.int32)
return rects.shape[0] if len(rects.shape) == 2 else 1
def get_item(self, index):
img_path = self.img_files[index]
rect_path = self.img_files[index].replace('.%s' % (self.img_files[index].split('.')[-1]), '_rect.txt')
# Name
img_name = os.path.splitext(os.path.basename(img_path))[0]
im = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
if im.shape[2] == 4:
im = im / 255.0
im[:,:,:3] /= im[:,:,3:] + 1e-8
im = im[:,:,3:] * im[:,:,:3] + 0.5 * (1.0 - im[:,:,3:])
im = (255.0 * im).astype(np.uint8)
h, w = im.shape[:2]
intrinsic = np.identity(4)
trans_mat = np.identity(4)
rects = np.loadtxt(rect_path, dtype=np.int32)
if len(rects.shape) == 1:
rects = rects[None]
pid = min(rects.shape[0]-1, self.person_id)
rect = rects[pid].tolist()
im = crop_image(im, rect)
scale_im2ndc = 1.0 / float(w // 2)
scale = w / rect[2]
trans_mat *= scale
trans_mat[3,3] = 1.0
trans_mat[0, 3] = -scale*(rect[0] + rect[2]//2 - w//2) * scale_im2ndc
trans_mat[1, 3] = scale*(rect[1] + rect[3]//2 - h//2) * scale_im2ndc
intrinsic = np.matmul(trans_mat, intrinsic)
im_512 = cv2.resize(im, (512, 512))
im = cv2.resize(im, (self.load_size, self.load_size))
image_512 = Image.fromarray(im_512[:,:,::-1]).convert('RGB')
image = Image.fromarray(im[:,:,::-1]).convert('RGB')
B_MIN = np.array([-1, -1, -1])
B_MAX = np.array([1, 1, 1])
projection_matrix = np.identity(4)
projection_matrix[1, 1] = -1
calib = torch.Tensor(projection_matrix).float()
calib_world = torch.Tensor(intrinsic).float()
# image
image_512 = self.to_tensor(image_512)
image = self.to_tensor(image)
return {
'name': img_name,
'img': image.unsqueeze(0),
'img_512': image_512.unsqueeze(0),
'calib': calib.unsqueeze(0),
'calib_world': calib_world.unsqueeze(0),
'b_min': B_MIN,
'b_max': B_MAX,
}
def __getitem__(self, index):
return self.get_item(index)
|