File size: 7,065 Bytes
e170a8e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | from pathlib import Path
import torch
import torch.utils.data as data
import cv2
import numpy as np
from transforms3d.quaternions import qinverse, qmult, rotate_vector, quat2mat
from utils.transform import correct_intrinsic_scale
from utils import Augmentor
class MapFreeScene(data.Dataset):
def __init__(self, scene_root, resize, sample_factor=1, overlap_limits=None, estimated_depth=None, mode='train'):
super().__init__()
self.scene_root = Path(scene_root)
self.resize = resize
self.sample_factor = sample_factor
self.estimated_depth = estimated_depth
# load absolute poses
self.poses = self.read_poses(self.scene_root)
# read intrinsics
self.K = self.read_intrinsics(self.scene_root, resize)
# load pairs
self.pairs = self.load_pairs(self.scene_root, overlap_limits, self.sample_factor)
self.augment = Augmentor(mode=='train')
@staticmethod
def read_intrinsics(scene_root: Path, resize=None):
Ks = {}
with (scene_root / 'intrinsics.txt').open('r') as f:
for line in f.readlines():
if '#' in line:
continue
line = line.strip().split(' ')
img_name = line[0]
fx, fy, cx, cy, W, H = map(float, line[1:])
K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32)
if resize is not None:
K = correct_intrinsic_scale(K, resize[0] / W, resize[1] / H)
Ks[img_name] = K
return Ks
@staticmethod
def read_poses(scene_root: Path):
"""
Returns a dictionary that maps: img_path -> (q, t) where
np.array q = (qw, qx qy qz) quaternion encoding rotation matrix;
np.array t = (tx ty tz) translation vector;
(q, t) encodes absolute pose (world-to-camera), i.e. X_c = R(q) X_W + t
"""
poses = {}
with (scene_root / 'poses.txt').open('r') as f:
for line in f.readlines():
if '#' in line:
continue
line = line.strip().split(' ')
img_name = line[0]
qt = np.array(list(map(float, line[1:])))
poses[img_name] = (qt[:4], qt[4:])
return poses
def load_pairs(self, scene_root: Path, overlap_limits: tuple = None, sample_factor: int = 1):
"""
For training scenes, filter pairs of frames based on overlap (pre-computed in overlaps.npz)
For test/val scenes, pairs are formed between keyframe and every other sample_factor query frames.
If sample_factor == 1, all query frames are used. Note: sample_factor applicable only to test/val
Returns:
pairs: nd.array [Npairs, 4], where each column represents seaA, imA, seqB, imB, respectively
"""
overlaps_path = scene_root / 'overlaps.npz'
if overlaps_path.exists():
f = np.load(overlaps_path, allow_pickle=True)
idxs, overlaps = f['idxs'], f['overlaps']
if overlap_limits is not None:
min_overlap, max_overlap = overlap_limits
mask = (overlaps > min_overlap) * (overlaps < max_overlap)
idxs = idxs[mask]
return idxs.copy()
else:
idxs = np.zeros((len(self.poses) - 1, 4), dtype=np.uint16)
idxs[:, 2] = 1
idxs[:, 3] = np.array([int(fn[-9:-4])
for fn in self.poses.keys() if 'seq0' not in fn], dtype=np.uint16)
return idxs[::sample_factor]
def get_pair_path(self, pair):
seqA, imgA, seqB, imgB = pair
return (f'seq{seqA}/frame_{imgA:05}.jpg', f'seq{seqB}/frame_{imgB:05}.jpg')
def __len__(self):
return len(self.pairs)
def __getitem__(self, index):
# image paths (relative to scene_root)
img_name0, img_name1 = self.get_pair_path(self.pairs[index])
w_new, h_new = self.resize
image0 = cv2.imread(str(self.scene_root / img_name0))
# image0 = cv2.resize(image0, (w_new, h_new))
image0 = cv2.cvtColor(image0, cv2.COLOR_BGR2RGB)
image0 = self.augment(image0)
image0 = torch.from_numpy(image0).permute(2, 0, 1).float() / 255.
image1 = cv2.imread(str(self.scene_root / img_name1))
# image1 = cv2.resize(image1, (w_new, h_new))
image1 = cv2.cvtColor(image1, cv2.COLOR_BGR2RGB)
image1 = self.augment(image1)
image1 = torch.from_numpy(image1).permute(2, 0, 1).float() / 255.
images = torch.stack([image0, image1], dim=0)
depth0 = np.load(str(self.scene_root / img_name0).replace('.jpg', f'.da.npy'))
depth0 = torch.from_numpy(depth0).float()
depth1 = np.load(str(self.scene_root / img_name1).replace('.jpg', f'.da.npy'))
depth1 = torch.from_numpy(depth1).float()
depths = torch.stack([depth0, depth1], dim=0)
# get absolute pose of im0 and im1
# quaternion and translation vector that transforms World-to-Cam
q1, t1 = self.poses[img_name0]
# quaternion and translation vector that transforms World-to-Cam
q2, t2 = self.poses[img_name1]
# get 4 x 4 relative pose transformation matrix (from im1 to im2)
# for test/val set, q1,t1 is the identity pose, so the relative pose matches the absolute pose
q12 = qmult(q2, qinverse(q1))
t12 = t2 - rotate_vector(t1, q12)
T = np.eye(4, dtype=np.float32)
T[:3, :3] = quat2mat(q12)
T[:3, -1] = t12
T = torch.from_numpy(T)
K_0 = torch.from_numpy(self.K[img_name0].copy()).reshape(3, 3)
K_1 = torch.from_numpy(self.K[img_name1].copy()).reshape(3, 3)
intrinsics = torch.stack([K_0, K_1], dim=0).float()
data = {
'images': images,
'depths': depths,
'rotation': T[:3, :3],
'translation': T[:3, 3],
'intrinsics': intrinsics,
'scene_id': self.scene_root.stem,
'scene_root': str(self.scene_root),
'pair_id': index*self.sample_factor,
'pair_names': (img_name0, img_name1),
}
return data
def build_concat_mapfree(mode, config):
assert mode in ['train', 'val', 'test'], 'Invalid dataset mode'
data_root = Path(config.DATASET.DATA_ROOT) / mode
scenes = scenes = [s.name for s in data_root.iterdir() if s.is_dir()]
sample_factor = {'train': 1, 'val': 5, 'test': 1}[mode]
estimated_depth = config.DATASET.ESTIMATED_DEPTH
resize = (540, 720)
overlap_limits = (0.2, 0.7)
# Init dataset objects for each scene
datasets = [MapFreeScene(data_root / scene, resize, sample_factor, overlap_limits, estimated_depth, mode) for scene in scenes]
return data.ConcatDataset(datasets)
|