SRPose / datasets /ho3d.py
FrickYinn's picture
Upload 53 files
e170a8e verified
from pathlib import Path
import numpy as np
from PIL import Image
import cv2
import pickle
import json
from tqdm import tqdm
import torch
from torch.utils.data import Dataset, ConcatDataset
from utils.augment import Augmentor
class HO3D(Dataset):
def __init__(self, data_root, sequence_path, mode):
self.data_root = Path(data_root)
mode = 'evaluation' if mode != 'train' else 'train'
self.sequence_dir = self.data_root / mode / sequence_path
self.color_dir = self.sequence_dir / 'rgb'
self.mask_dir = self.sequence_dir / 'seg'
self.depth_dir = self.sequence_dir / 'depth'
self.meta_dir = self.sequence_dir / 'meta'
self.color_paths = list(self.color_dir.iterdir())
self.color_paths = sorted(self.color_paths)
self.mask_paths = [self.mask_dir / f'{x.stem}.png' for x in self.color_paths]
self.depth_paths = [self.depth_dir / f'{x.stem}.png' for x in self.color_paths]
self.meta_paths = [self.meta_dir / f'{x.stem}.pkl' for x in self.color_paths]
# self.glcam_in_cvcam = torch.tensor([
# [1,0,0,0],
# [0,-1,0,0],
# [0,0,-1,0],
# [0,0,0,1]
# ]).float()
self.intrinsics, self.extrinsics, self.objCorners, self.objNames, valid = self._load_meta(self.meta_paths)
self.color_paths = np.array(self.color_paths)[valid.numpy()]
self.mask_paths = np.array(self.mask_paths)[valid.numpy()]
self.depth_paths = np.array(self.depth_paths)[valid.numpy()]
self.meta_paths = np.array(self.meta_paths)[valid.numpy()]
self.bboxes, valid = self._load_bboxes(self.mask_paths)
self.intrinsics = self.intrinsics[valid]
self.extrinsics = self.extrinsics[valid]
self.objCorners = self.objCorners[valid]
self.objNames = self.objNames[valid.numpy()]
self.color_paths = self.color_paths[valid.numpy()]
self.mask_paths = self.mask_paths[valid.numpy()]
self.depth_paths = self.depth_paths[valid.numpy()]
self.meta_paths = self.meta_paths[valid.numpy()]
assert len(self.color_paths) == self.intrinsics.shape[0]
assert len(self.objNames) == self.extrinsics.shape[0]
self.augment = Augmentor(mode=='train')
def __len__(self):
return len(self.color_paths)
def _load_bboxes(self, mask_paths):
bboxes = []
valid = []
for mask_path in mask_paths:
mask = cv2.imread(str(mask_path))
# mask = cv2.resize(mask, (640, 480))
w_scale, h_scale = 640 / mask.shape[1], 480 / mask.shape[0]
obj_mask = torch.from_numpy(mask[..., 1] == 255)
if obj_mask.float().sum() < 100:
valid.append(False)
continue
valid.append(True)
mask_inds = torch.where(obj_mask)
x1, x2 = mask_inds[0].aminmax()
y1, y2 = mask_inds[1].aminmax()
bbox = torch.tensor([y1*h_scale, x1*w_scale, y2*h_scale, x2*w_scale]).int()
bboxes.append(bbox)
bboxes = torch.stack(bboxes)
valid = torch.tensor(valid)
return bboxes, valid
def _load_meta(self, meta_paths):
intrinsics = []
extrinsics = []
objCorners = []
objNames = []
valid = []
for meta_path in meta_paths:
with open(meta_path, 'rb') as f:
anno = pickle.load(f, encoding='latin1')
if anno['camMat'] is None:
valid.append(False)
continue
valid.append(True)
camMat = torch.from_numpy(anno['camMat'])
ex = torch.eye(4)
ex[:3, :3] = torch.from_numpy(cv2.Rodrigues(anno['objRot'])[0])
ex[:3, 3] = torch.from_numpy(anno['objTrans'])
# ex = self.glcam_in_cvcam @ ex
objCorners3DRest = torch.from_numpy(anno['objCorners3DRest']).float()
# objCorners3DRest = (ex[:3, :3] @ objCorners3DRest.T + ex[:3, 3:]).T
objCorners3DRest = objCorners3DRest @ ex[:3, :3].T + ex[:3, 3]
intrinsics.append(camMat)
extrinsics.append(ex)
objCorners.append(objCorners3DRest)
objNames.append(anno['objName'])
intrinsics = torch.stack(intrinsics).float()
extrinsics = torch.stack(extrinsics).float()
objCorners = torch.stack(objCorners)
objNames = np.array(objNames)
valid = torch.tensor(valid)
return intrinsics, extrinsics, objCorners, objNames, valid
def _load_mask(self, mask_path):
mask = cv2.imread(str(mask_path))
mask = cv2.resize(mask, (640, 480))
mask = mask[..., 1] == 255
return mask
def _load_depth(self, depth_path):
depth_scale = 0.00012498664727900177
depth_img = cv2.imread(str(depth_path))
dpt = depth_img[:, :, 2] + depth_img[:, :, 1] * 256
dpt = dpt * depth_scale
return dpt
def __getitem__(self, idx):
color = cv2.imread(str(self.color_paths[idx]))
color = cv2.cvtColor(color, cv2.COLOR_BGR2RGB)
# color = self.augment(color)
color = (torch.tensor(color).float() / 255.0).permute(2, 0, 1)
mask = self._load_mask(self.mask_paths[idx])
mask = torch.from_numpy(mask)
depth = self._load_depth(self.depth_paths[idx])
depth = torch.from_numpy(depth)
bbox = self.bboxes[idx]
intrinsic = self.intrinsics[idx]
extrinsic = self.extrinsics[idx]
objCorners = self.objCorners[idx]
objName = self.objNames[idx]
return {
'color': color,
'mask': mask,
'depth': depth,
'extrinsic': extrinsic,
'intrinsic': intrinsic,
'objCorners': objCorners,
'bbox': bbox,
'color_path': str(self.color_paths[idx]).split('/', 2)[-1],
'objName': objName,
}
class HO3DPair(Dataset):
def __init__(self, data_root, mode, sequence_id, max_angle_error):
self.ho3d_dataset = HO3D(data_root, sequence_id, mode)
angle_err = self.get_angle_error(self.ho3d_dataset.extrinsics[:, :3, :3])
index0, index1 = torch.where(angle_err < max_angle_error)
filter = torch.where(index0 < index1)
self.index0, self.index1 = index0[filter], index1[filter]
# angle_err_filtered = angle_err[row, col]
self.indices = torch.tensor(list(zip(self.index0, self.index1)))
if mode == 'val' or mode == 'test':
self.indices = self.indices[torch.randperm(self.indices.size(0))[:1500]]
def get_angle_error(self, R):
# R: (B, 3, 3)
residual = torch.einsum('aij,bik->abjk', R, R)
trace = torch.diagonal(residual, dim1=-2, dim2=-1).sum(-1)
cosine = (trace - 1) / 2
cosine = torch.clip(cosine, -1, 1)
R_err = torch.acos(cosine)
angle_err = R_err.rad2deg()
return angle_err
def __len__(self):
return len(self.indices)
def __getitem__(self, idx):
idx0, idx1 = self.indices[idx]
data0, data1 = self.ho3d_dataset[idx0], self.ho3d_dataset[idx1]
images = torch.stack([data0['color'], data1['color']], dim=0)
ex0, ex1 = data0['extrinsic'], data1['extrinsic']
rel_ex = ex1 @ ex0.inverse()
rel_R = rel_ex[:3, :3]
rel_t = rel_ex[:3, 3]
intrinsics = torch.stack([data0['intrinsic'], data1['intrinsic']], dim=0)
bboxes = torch.stack([data0['bbox'], data1['bbox']])
objCorners = torch.stack([data0['objCorners'], data1['objCorners']])
return {
'images': images,
'rotation': rel_R,
'translation': rel_t,
'intrinsics': intrinsics,
'bboxes': bboxes,
'objCorners': objCorners,
'pair_names': (data0['color_path'], data1['color_path']),
'objName': data0['objName']
}
class HO3DfromJson(Dataset):
def __init__(self, data_root, json_path):
self.data_root = Path(data_root)
with open(json_path, 'r') as f:
self.scene_info = json.load(f)
self.obj_names = [
'003_cracker_box',
'006_mustard_bottle',
'011_banana',
'025_mug',
'037_scissors'
]
self.object_points = {obj: np.loadtxt(self.data_root / 'models' / obj / 'points.xyz') for obj in self.obj_names}
def _load_color(self, path):
color = cv2.imread(path)
color = cv2.cvtColor(color, cv2.COLOR_BGR2RGB)
return color
def _load_mask(self, path):
mask_path = str(path).replace('rgb', 'seg').replace('.jpg', '.png')
mask = cv2.imread(str(mask_path))
mask = cv2.resize(mask, (640, 480))
mask = mask[..., 1] == 255
return mask
def _load_depth(self, path):
depth_scale = 0.00012498664727900177
depth_path = str(path).replace('rgb', 'depth').replace('.jpg', '.png')
depth_img = cv2.imread(depth_path)
dpt = depth_img[:, :, 2] + depth_img[:, :, 1] * 256
dpt = dpt * depth_scale
return dpt
def __len__(self):
return len(self.scene_info)
def __getitem__(self, idx):
info = self.scene_info[str(idx)]
pair_names = info['pair_names']
image0 = self._load_color(str(self.data_root / pair_names[0]))
image0 = (torch.tensor(image0).float() / 255.0).permute(2, 0, 1)
image1 = self._load_color(str(self.data_root / pair_names[1]))
image1 = (torch.tensor(image1).float() / 255.0).permute(2, 0, 1)
images = torch.stack([image0, image1], dim=0)
mask0 = self._load_mask(str(self.data_root / pair_names[0]))
mask0 = torch.from_numpy(mask0)
mask1 = self._load_mask(str(self.data_root / pair_names[1]))
mask1 = torch.from_numpy(mask1)
masks = torch.stack([mask0, mask1], dim=0)
depth0 = self._load_depth(str(self.data_root / pair_names[0]))
depth0 = torch.from_numpy(depth0)
depth1 = self._load_depth(str(self.data_root / pair_names[1]))
depth1 = torch.from_numpy(depth1)
depths = torch.stack([depth0, depth1], dim=0)
rotation = torch.tensor(info['rotation']).reshape(3, 3)
translation = torch.tensor(info['translation'])
intrinsics = torch.tensor(info['intrinsics']).reshape(2, 3, 3)
bboxes = torch.tensor(info['bboxes'])
objCorners = torch.tensor(info['objCorners'])
return {
'images': images,
'masks': masks,
'depths': depths,
'rotation': rotation,
'translation': translation,
'intrinsics': intrinsics,
'bboxes': bboxes,
'objCorners': objCorners,
'objName': info['objName'][0],
'point_cloud': self.object_points[info['objName'][0]]
}
def build_ho3d(mode, config):
config = config.DATASET
data_root = config.DATA_ROOT
seq_id_list = [x.stem for x in (Path(data_root) / 'train').iterdir()]
val_id_list = ['BB14', 'SMu1', 'MC1', 'GSF14', 'SM2', 'SM3', 'SM4', 'SM5', 'MC2', 'MC4', 'MC5', 'MC6']
for val_id in val_id_list:
seq_id_list.remove(val_id)
if mode == 'train':
datasets = []
for seq_id in tqdm(seq_id_list, desc=f'Loading HO3D {mode} dataset'):
datasets.append(HO3DPair(data_root, mode, seq_id, config.MAX_ANGLE_ERROR))
return ConcatDataset(datasets)
elif mode == 'test' or mode == 'val':
# datasets = []
# for seq_id in tqdm(val_id_list[:5], desc=f'Loading HO3D {mode} dataset'):
# datasets.append(HO3DPair(data_root, mode, seq_id, config.MAX_ANGLE_ERROR))
# return ConcatDataset(datasets)
return HO3DfromJson(config.DATA_ROOT, config.JSON_PATH)