| from pathlib import Path
|
| import numpy as np
|
| from PIL import Image
|
| import cv2
|
| import pickle
|
| import json
|
| from tqdm import tqdm
|
| import torch
|
| from torch.utils.data import Dataset, ConcatDataset
|
|
|
| from utils.augment import Augmentor
|
|
|
|
|
| class HO3D(Dataset):
|
| def __init__(self, data_root, sequence_path, mode):
|
| self.data_root = Path(data_root)
|
| mode = 'evaluation' if mode != 'train' else 'train'
|
| self.sequence_dir = self.data_root / mode / sequence_path
|
|
|
| self.color_dir = self.sequence_dir / 'rgb'
|
| self.mask_dir = self.sequence_dir / 'seg'
|
| self.depth_dir = self.sequence_dir / 'depth'
|
| self.meta_dir = self.sequence_dir / 'meta'
|
|
|
| self.color_paths = list(self.color_dir.iterdir())
|
| self.color_paths = sorted(self.color_paths)
|
|
|
| self.mask_paths = [self.mask_dir / f'{x.stem}.png' for x in self.color_paths]
|
| self.depth_paths = [self.depth_dir / f'{x.stem}.png' for x in self.color_paths]
|
| self.meta_paths = [self.meta_dir / f'{x.stem}.pkl' for x in self.color_paths]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| self.intrinsics, self.extrinsics, self.objCorners, self.objNames, valid = self._load_meta(self.meta_paths)
|
|
|
| self.color_paths = np.array(self.color_paths)[valid.numpy()]
|
| self.mask_paths = np.array(self.mask_paths)[valid.numpy()]
|
| self.depth_paths = np.array(self.depth_paths)[valid.numpy()]
|
| self.meta_paths = np.array(self.meta_paths)[valid.numpy()]
|
|
|
| self.bboxes, valid = self._load_bboxes(self.mask_paths)
|
| self.intrinsics = self.intrinsics[valid]
|
| self.extrinsics = self.extrinsics[valid]
|
| self.objCorners = self.objCorners[valid]
|
| self.objNames = self.objNames[valid.numpy()]
|
| self.color_paths = self.color_paths[valid.numpy()]
|
| self.mask_paths = self.mask_paths[valid.numpy()]
|
| self.depth_paths = self.depth_paths[valid.numpy()]
|
| self.meta_paths = self.meta_paths[valid.numpy()]
|
|
|
| assert len(self.color_paths) == self.intrinsics.shape[0]
|
| assert len(self.objNames) == self.extrinsics.shape[0]
|
|
|
| self.augment = Augmentor(mode=='train')
|
|
|
| def __len__(self):
|
| return len(self.color_paths)
|
|
|
| def _load_bboxes(self, mask_paths):
|
| bboxes = []
|
| valid = []
|
| for mask_path in mask_paths:
|
| mask = cv2.imread(str(mask_path))
|
|
|
| w_scale, h_scale = 640 / mask.shape[1], 480 / mask.shape[0]
|
| obj_mask = torch.from_numpy(mask[..., 1] == 255)
|
|
|
| if obj_mask.float().sum() < 100:
|
| valid.append(False)
|
| continue
|
| valid.append(True)
|
|
|
| mask_inds = torch.where(obj_mask)
|
| x1, x2 = mask_inds[0].aminmax()
|
| y1, y2 = mask_inds[1].aminmax()
|
| bbox = torch.tensor([y1*h_scale, x1*w_scale, y2*h_scale, x2*w_scale]).int()
|
| bboxes.append(bbox)
|
|
|
| bboxes = torch.stack(bboxes)
|
| valid = torch.tensor(valid)
|
|
|
| return bboxes, valid
|
|
|
| def _load_meta(self, meta_paths):
|
| intrinsics = []
|
| extrinsics = []
|
| objCorners = []
|
| objNames = []
|
| valid = []
|
| for meta_path in meta_paths:
|
| with open(meta_path, 'rb') as f:
|
| anno = pickle.load(f, encoding='latin1')
|
|
|
| if anno['camMat'] is None:
|
| valid.append(False)
|
| continue
|
| valid.append(True)
|
|
|
| camMat = torch.from_numpy(anno['camMat'])
|
| ex = torch.eye(4)
|
| ex[:3, :3] = torch.from_numpy(cv2.Rodrigues(anno['objRot'])[0])
|
| ex[:3, 3] = torch.from_numpy(anno['objTrans'])
|
|
|
| objCorners3DRest = torch.from_numpy(anno['objCorners3DRest']).float()
|
|
|
| objCorners3DRest = objCorners3DRest @ ex[:3, :3].T + ex[:3, 3]
|
|
|
| intrinsics.append(camMat)
|
| extrinsics.append(ex)
|
| objCorners.append(objCorners3DRest)
|
| objNames.append(anno['objName'])
|
|
|
| intrinsics = torch.stack(intrinsics).float()
|
| extrinsics = torch.stack(extrinsics).float()
|
| objCorners = torch.stack(objCorners)
|
| objNames = np.array(objNames)
|
| valid = torch.tensor(valid)
|
|
|
| return intrinsics, extrinsics, objCorners, objNames, valid
|
|
|
| def _load_mask(self, mask_path):
|
| mask = cv2.imread(str(mask_path))
|
| mask = cv2.resize(mask, (640, 480))
|
| mask = mask[..., 1] == 255
|
| return mask
|
|
|
| def _load_depth(self, depth_path):
|
| depth_scale = 0.00012498664727900177
|
| depth_img = cv2.imread(str(depth_path))
|
|
|
| dpt = depth_img[:, :, 2] + depth_img[:, :, 1] * 256
|
| dpt = dpt * depth_scale
|
|
|
| return dpt
|
|
|
| def __getitem__(self, idx):
|
| color = cv2.imread(str(self.color_paths[idx]))
|
| color = cv2.cvtColor(color, cv2.COLOR_BGR2RGB)
|
|
|
| color = (torch.tensor(color).float() / 255.0).permute(2, 0, 1)
|
|
|
| mask = self._load_mask(self.mask_paths[idx])
|
| mask = torch.from_numpy(mask)
|
| depth = self._load_depth(self.depth_paths[idx])
|
| depth = torch.from_numpy(depth)
|
|
|
| bbox = self.bboxes[idx]
|
|
|
| intrinsic = self.intrinsics[idx]
|
| extrinsic = self.extrinsics[idx]
|
| objCorners = self.objCorners[idx]
|
| objName = self.objNames[idx]
|
|
|
| return {
|
| 'color': color,
|
| 'mask': mask,
|
| 'depth': depth,
|
| 'extrinsic': extrinsic,
|
| 'intrinsic': intrinsic,
|
| 'objCorners': objCorners,
|
| 'bbox': bbox,
|
| 'color_path': str(self.color_paths[idx]).split('/', 2)[-1],
|
| 'objName': objName,
|
| }
|
|
|
|
|
| class HO3DPair(Dataset):
|
| def __init__(self, data_root, mode, sequence_id, max_angle_error):
|
| self.ho3d_dataset = HO3D(data_root, sequence_id, mode)
|
|
|
| angle_err = self.get_angle_error(self.ho3d_dataset.extrinsics[:, :3, :3])
|
| index0, index1 = torch.where(angle_err < max_angle_error)
|
| filter = torch.where(index0 < index1)
|
| self.index0, self.index1 = index0[filter], index1[filter]
|
|
|
|
|
| self.indices = torch.tensor(list(zip(self.index0, self.index1)))
|
| if mode == 'val' or mode == 'test':
|
| self.indices = self.indices[torch.randperm(self.indices.size(0))[:1500]]
|
|
|
| def get_angle_error(self, R):
|
|
|
| residual = torch.einsum('aij,bik->abjk', R, R)
|
| trace = torch.diagonal(residual, dim1=-2, dim2=-1).sum(-1)
|
| cosine = (trace - 1) / 2
|
| cosine = torch.clip(cosine, -1, 1)
|
| R_err = torch.acos(cosine)
|
| angle_err = R_err.rad2deg()
|
|
|
| return angle_err
|
|
|
| def __len__(self):
|
| return len(self.indices)
|
|
|
| def __getitem__(self, idx):
|
| idx0, idx1 = self.indices[idx]
|
| data0, data1 = self.ho3d_dataset[idx0], self.ho3d_dataset[idx1]
|
|
|
| images = torch.stack([data0['color'], data1['color']], dim=0)
|
|
|
| ex0, ex1 = data0['extrinsic'], data1['extrinsic']
|
| rel_ex = ex1 @ ex0.inverse()
|
| rel_R = rel_ex[:3, :3]
|
| rel_t = rel_ex[:3, 3]
|
|
|
| intrinsics = torch.stack([data0['intrinsic'], data1['intrinsic']], dim=0)
|
| bboxes = torch.stack([data0['bbox'], data1['bbox']])
|
| objCorners = torch.stack([data0['objCorners'], data1['objCorners']])
|
|
|
| return {
|
| 'images': images,
|
| 'rotation': rel_R,
|
| 'translation': rel_t,
|
| 'intrinsics': intrinsics,
|
| 'bboxes': bboxes,
|
| 'objCorners': objCorners,
|
| 'pair_names': (data0['color_path'], data1['color_path']),
|
| 'objName': data0['objName']
|
| }
|
|
|
|
|
| class HO3DfromJson(Dataset):
|
| def __init__(self, data_root, json_path):
|
| self.data_root = Path(data_root)
|
| with open(json_path, 'r') as f:
|
| self.scene_info = json.load(f)
|
|
|
| self.obj_names = [
|
| '003_cracker_box',
|
| '006_mustard_bottle',
|
| '011_banana',
|
| '025_mug',
|
| '037_scissors'
|
| ]
|
| self.object_points = {obj: np.loadtxt(self.data_root / 'models' / obj / 'points.xyz') for obj in self.obj_names}
|
|
|
| def _load_color(self, path):
|
| color = cv2.imread(path)
|
| color = cv2.cvtColor(color, cv2.COLOR_BGR2RGB)
|
| return color
|
|
|
| def _load_mask(self, path):
|
| mask_path = str(path).replace('rgb', 'seg').replace('.jpg', '.png')
|
| mask = cv2.imread(str(mask_path))
|
| mask = cv2.resize(mask, (640, 480))
|
| mask = mask[..., 1] == 255
|
| return mask
|
|
|
| def _load_depth(self, path):
|
| depth_scale = 0.00012498664727900177
|
|
|
| depth_path = str(path).replace('rgb', 'depth').replace('.jpg', '.png')
|
| depth_img = cv2.imread(depth_path)
|
|
|
| dpt = depth_img[:, :, 2] + depth_img[:, :, 1] * 256
|
| dpt = dpt * depth_scale
|
|
|
| return dpt
|
|
|
| def __len__(self):
|
| return len(self.scene_info)
|
|
|
| def __getitem__(self, idx):
|
| info = self.scene_info[str(idx)]
|
| pair_names = info['pair_names']
|
|
|
| image0 = self._load_color(str(self.data_root / pair_names[0]))
|
| image0 = (torch.tensor(image0).float() / 255.0).permute(2, 0, 1)
|
| image1 = self._load_color(str(self.data_root / pair_names[1]))
|
| image1 = (torch.tensor(image1).float() / 255.0).permute(2, 0, 1)
|
| images = torch.stack([image0, image1], dim=0)
|
|
|
| mask0 = self._load_mask(str(self.data_root / pair_names[0]))
|
| mask0 = torch.from_numpy(mask0)
|
| mask1 = self._load_mask(str(self.data_root / pair_names[1]))
|
| mask1 = torch.from_numpy(mask1)
|
| masks = torch.stack([mask0, mask1], dim=0)
|
|
|
| depth0 = self._load_depth(str(self.data_root / pair_names[0]))
|
| depth0 = torch.from_numpy(depth0)
|
| depth1 = self._load_depth(str(self.data_root / pair_names[1]))
|
| depth1 = torch.from_numpy(depth1)
|
| depths = torch.stack([depth0, depth1], dim=0)
|
|
|
| rotation = torch.tensor(info['rotation']).reshape(3, 3)
|
| translation = torch.tensor(info['translation'])
|
| intrinsics = torch.tensor(info['intrinsics']).reshape(2, 3, 3)
|
| bboxes = torch.tensor(info['bboxes'])
|
| objCorners = torch.tensor(info['objCorners'])
|
|
|
| return {
|
| 'images': images,
|
| 'masks': masks,
|
| 'depths': depths,
|
| 'rotation': rotation,
|
| 'translation': translation,
|
| 'intrinsics': intrinsics,
|
| 'bboxes': bboxes,
|
| 'objCorners': objCorners,
|
| 'objName': info['objName'][0],
|
| 'point_cloud': self.object_points[info['objName'][0]]
|
| }
|
|
|
|
|
| def build_ho3d(mode, config):
|
| config = config.DATASET
|
|
|
| data_root = config.DATA_ROOT
|
| seq_id_list = [x.stem for x in (Path(data_root) / 'train').iterdir()]
|
| val_id_list = ['BB14', 'SMu1', 'MC1', 'GSF14', 'SM2', 'SM3', 'SM4', 'SM5', 'MC2', 'MC4', 'MC5', 'MC6']
|
| for val_id in val_id_list:
|
| seq_id_list.remove(val_id)
|
|
|
| if mode == 'train':
|
| datasets = []
|
| for seq_id in tqdm(seq_id_list, desc=f'Loading HO3D {mode} dataset'):
|
| datasets.append(HO3DPair(data_root, mode, seq_id, config.MAX_ANGLE_ERROR))
|
| return ConcatDataset(datasets)
|
|
|
| elif mode == 'test' or mode == 'val':
|
|
|
|
|
|
|
|
|
|
|
| return HO3DfromJson(config.DATA_ROOT, config.JSON_PATH)
|
| |