| |
| |
| |
| |
| |
| |
| |
| |
| |
| import os |
| import json |
| import os.path as osp |
| import decimal |
| import argparse |
| import math |
| from bisect import bisect_left |
| from PIL import Image |
| import numpy as np |
| import quaternion |
| from scipy import interpolate |
| import cv2 |
|
|
|
|
| def get_parser(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument('--arkitscenes_dir', required=True) |
| parser.add_argument('--precomputed_pairs', required=True) |
| parser.add_argument('--output_dir', default='data/arkitscenes_processed') |
| return parser |
|
|
|
|
| def value_to_decimal(value, decimal_places): |
| decimal.getcontext().rounding = decimal.ROUND_HALF_UP |
| return decimal.Decimal(str(float(value))).quantize(decimal.Decimal('1e-{}'.format(decimal_places))) |
|
|
|
|
| def closest(value, sorted_list): |
| index = bisect_left(sorted_list, value) |
| if index == 0: |
| return sorted_list[0] |
| elif index == len(sorted_list): |
| return sorted_list[-1] |
| else: |
| value_before = sorted_list[index - 1] |
| value_after = sorted_list[index] |
| if value_after - value < value - value_before: |
| return value_after |
| else: |
| return value_before |
|
|
|
|
| def get_up_vectors(pose_device_to_world): |
| return np.matmul(pose_device_to_world, np.array([[0.0], [-1.0], [0.0], [0.0]])) |
|
|
|
|
| def get_right_vectors(pose_device_to_world): |
| return np.matmul(pose_device_to_world, np.array([[1.0], [0.0], [0.0], [0.0]])) |
|
|
|
|
| def read_traj(traj_path): |
| quaternions = [] |
| poses = [] |
| timestamps = [] |
| poses_p_to_w = [] |
| with open(traj_path) as f: |
| traj_lines = f.readlines() |
| for line in traj_lines: |
| tokens = line.split() |
| assert len(tokens) == 7 |
| traj_timestamp = float(tokens[0]) |
|
|
| timestamps_decimal_value = value_to_decimal(traj_timestamp, 3) |
| timestamps.append(float(timestamps_decimal_value)) |
|
|
| angle_axis = [float(tokens[1]), float(tokens[2]), float(tokens[3])] |
| r_w_to_p, _ = cv2.Rodrigues(np.asarray(angle_axis)) |
| t_w_to_p = np.asarray([float(tokens[4]), float(tokens[5]), float(tokens[6])]) |
|
|
| pose_w_to_p = np.eye(4) |
| pose_w_to_p[:3, :3] = r_w_to_p |
| pose_w_to_p[:3, 3] = t_w_to_p |
|
|
| pose_p_to_w = np.linalg.inv(pose_w_to_p) |
|
|
| r_p_to_w_as_quat = quaternion.from_rotation_matrix(pose_p_to_w[:3, :3]) |
| t_p_to_w = pose_p_to_w[:3, 3] |
| poses_p_to_w.append(pose_p_to_w) |
| poses.append(t_p_to_w) |
| quaternions.append(r_p_to_w_as_quat) |
| return timestamps, poses, quaternions, poses_p_to_w |
|
|
|
|
| def main(rootdir, pairsdir, outdir): |
| os.makedirs(outdir, exist_ok=True) |
|
|
| subdirs = ['Test', 'Training'] |
| for subdir in subdirs: |
| if not osp.isdir(osp.join(rootdir, subdir)): |
| continue |
| |
| outsubdir = osp.join(outdir, subdir) |
| os.makedirs(outsubdir, exist_ok=True) |
| listfile = osp.join(pairsdir, subdir, 'scene_list.json') |
| with open(listfile, 'r') as f: |
| scene_dirs = json.load(f) |
|
|
| valid_scenes = [] |
| for scene_subdir in scene_dirs: |
| out_scene_subdir = osp.join(outsubdir, scene_subdir) |
| os.makedirs(out_scene_subdir, exist_ok=True) |
|
|
| scene_dir = osp.join(rootdir, subdir, scene_subdir) |
| depth_dir = osp.join(scene_dir, 'lowres_depth') |
| rgb_dir = osp.join(scene_dir, 'vga_wide') |
| intrinsics_dir = osp.join(scene_dir, 'vga_wide_intrinsics') |
| traj_path = osp.join(scene_dir, 'lowres_wide.traj') |
|
|
| |
| selected_pairs_path = osp.join(pairsdir, subdir, scene_subdir, 'selected_pairs.npz') |
| selected_npz = np.load(selected_pairs_path) |
| selection, pairs = selected_npz['selection'], selected_npz['pairs'] |
| selected_sky_direction_scene = str(selected_npz['sky_direction_scene'][0]) |
| if len(selection) == 0 or len(pairs) == 0: |
| |
| continue |
| valid_scenes.append(scene_subdir) |
|
|
| |
| scene_metadata_path = osp.join(out_scene_subdir, 'scene_metadata.npz') |
| if osp.isfile(scene_metadata_path): |
| continue |
| else: |
| print(f'parsing {scene_subdir}') |
| |
| timestamps, poses, quaternions, poses_cam_to_world = read_traj(traj_path) |
|
|
| poses = np.array(poses) |
| quaternions = np.array(quaternions, dtype=np.quaternion) |
| quaternions = quaternion.unflip_rotors(quaternions) |
| timestamps = np.array(timestamps) |
|
|
| selected_images = [(basename, basename.split(".png")[0].split("_")[1]) for basename in selection] |
| timestamps_selected = [float(frame_id) for _, frame_id in selected_images] |
|
|
| sky_direction_scene, trajectories, intrinsics, images = convert_scene_metadata(scene_subdir, |
| intrinsics_dir, |
| timestamps, |
| quaternions, |
| poses, |
| poses_cam_to_world, |
| selected_images, |
| timestamps_selected) |
| assert selected_sky_direction_scene == sky_direction_scene |
|
|
| os.makedirs(os.path.join(out_scene_subdir, 'vga_wide'), exist_ok=True) |
| os.makedirs(os.path.join(out_scene_subdir, 'lowres_depth'), exist_ok=True) |
| assert isinstance(sky_direction_scene, str) |
| for basename in images: |
| img_out = os.path.join(out_scene_subdir, 'vga_wide', basename.replace('.png', '.jpg')) |
| depth_out = os.path.join(out_scene_subdir, 'lowres_depth', basename) |
| if osp.isfile(img_out) and osp.isfile(depth_out): |
| continue |
|
|
| vga_wide_path = osp.join(rgb_dir, basename) |
| depth_path = osp.join(depth_dir, basename) |
|
|
| img = Image.open(vga_wide_path) |
| depth = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED) |
|
|
| |
| if sky_direction_scene == 'RIGHT': |
| try: |
| img = img.transpose(Image.Transpose.ROTATE_90) |
| except Exception: |
| img = img.transpose(Image.ROTATE_90) |
| depth = cv2.rotate(depth, cv2.ROTATE_90_COUNTERCLOCKWISE) |
| elif sky_direction_scene == 'LEFT': |
| try: |
| img = img.transpose(Image.Transpose.ROTATE_270) |
| except Exception: |
| img = img.transpose(Image.ROTATE_270) |
| depth = cv2.rotate(depth, cv2.ROTATE_90_CLOCKWISE) |
| elif sky_direction_scene == 'DOWN': |
| try: |
| img = img.transpose(Image.Transpose.ROTATE_180) |
| except Exception: |
| img = img.transpose(Image.ROTATE_180) |
| depth = cv2.rotate(depth, cv2.ROTATE_180) |
|
|
| W, H = img.size |
| if not osp.isfile(img_out): |
| img.save(img_out) |
|
|
| depth = cv2.resize(depth, (W, H), interpolation=cv2.INTER_NEAREST_EXACT) |
| if not osp.isfile(depth_out): |
| cv2.imwrite(depth_out, depth) |
|
|
| |
| np.savez(scene_metadata_path, |
| trajectories=trajectories, |
| intrinsics=intrinsics, |
| images=images, |
| pairs=pairs) |
|
|
| outlistfile = osp.join(outsubdir, 'scene_list.json') |
| with open(outlistfile, 'w') as f: |
| json.dump(valid_scenes, f) |
|
|
| |
| scene_data = {} |
| for scene_subdir in valid_scenes: |
| scene_metadata_path = osp.join(outsubdir, scene_subdir, 'scene_metadata.npz') |
| with np.load(scene_metadata_path) as data: |
| trajectories = data['trajectories'] |
| intrinsics = data['intrinsics'] |
| images = data['images'] |
| pairs = data['pairs'] |
| scene_data[scene_subdir] = {'trajectories': trajectories, |
| 'intrinsics': intrinsics, |
| 'images': images, |
| 'pairs': pairs} |
| offset = 0 |
| counts = [] |
| scenes = [] |
| sceneids = [] |
| images = [] |
| intrinsics = [] |
| trajectories = [] |
| pairs = [] |
| for scene_idx, (scene_subdir, data) in enumerate(scene_data.items()): |
| num_imgs = data['images'].shape[0] |
| img_pairs = data['pairs'] |
|
|
| scenes.append(scene_subdir) |
| sceneids.extend([scene_idx] * num_imgs) |
|
|
| images.append(data['images']) |
|
|
| K = np.expand_dims(np.eye(3), 0).repeat(num_imgs, 0) |
| K[:, 0, 0] = [fx for _, _, fx, _, _, _ in data['intrinsics']] |
| K[:, 1, 1] = [fy for _, _, _, fy, _, _ in data['intrinsics']] |
| K[:, 0, 2] = [hw for _, _, _, _, hw, _ in data['intrinsics']] |
| K[:, 1, 2] = [hh for _, _, _, _, _, hh in data['intrinsics']] |
|
|
| intrinsics.append(K) |
| trajectories.append(data['trajectories']) |
|
|
| |
| img_pairs[:, 0:2] += offset |
| pairs.append(img_pairs) |
| counts.append(offset) |
|
|
| offset += num_imgs |
|
|
| images = np.concatenate(images, axis=0) |
| intrinsics = np.concatenate(intrinsics, axis=0) |
| trajectories = np.concatenate(trajectories, axis=0) |
| pairs = np.concatenate(pairs, axis=0) |
| np.savez(osp.join(outsubdir, 'all_metadata.npz'), |
| counts=counts, |
| scenes=scenes, |
| sceneids=sceneids, |
| images=images, |
| intrinsics=intrinsics, |
| trajectories=trajectories, |
| pairs=pairs) |
|
|
|
|
| def convert_scene_metadata(scene_subdir, intrinsics_dir, |
| timestamps, quaternions, poses, poses_cam_to_world, |
| selected_images, timestamps_selected): |
| |
| sky_direction_scene, rotated_to_cam = find_scene_orientation(poses_cam_to_world) |
|
|
| |
| |
| timestamps_selected = np.array(timestamps_selected) |
| spline = interpolate.interp1d(timestamps, poses, kind='linear', axis=0) |
| interpolated_rotations = quaternion.squad(quaternions, timestamps, timestamps_selected) |
| interpolated_positions = spline(timestamps_selected) |
|
|
| trajectories = [] |
| intrinsics = [] |
| images = [] |
| for i, (basename, frame_id) in enumerate(selected_images): |
| intrinsic_fn = osp.join(intrinsics_dir, f"{scene_subdir}_{frame_id}.pincam") |
| if not osp.exists(intrinsic_fn): |
| intrinsic_fn = osp.join(intrinsics_dir, f"{scene_subdir}_{float(frame_id) - 0.001:.3f}.pincam") |
| if not osp.exists(intrinsic_fn): |
| intrinsic_fn = osp.join(intrinsics_dir, f"{scene_subdir}_{float(frame_id) + 0.001:.3f}.pincam") |
| assert osp.exists(intrinsic_fn) |
| w, h, fx, fy, hw, hh = np.loadtxt(intrinsic_fn) |
|
|
| pose = np.eye(4) |
| pose[:3, :3] = quaternion.as_rotation_matrix(interpolated_rotations[i]) |
| pose[:3, 3] = interpolated_positions[i] |
|
|
| images.append(basename) |
| if sky_direction_scene == 'RIGHT' or sky_direction_scene == 'LEFT': |
| intrinsics.append([h, w, fy, fx, hh, hw]) |
| else: |
| intrinsics.append([w, h, fx, fy, hw, hh]) |
| trajectories.append(pose @ rotated_to_cam) |
|
|
| return sky_direction_scene, trajectories, intrinsics, images |
|
|
|
|
| def find_scene_orientation(poses_cam_to_world): |
| if len(poses_cam_to_world) > 0: |
| up_vector = sum(get_up_vectors(p) for p in poses_cam_to_world) / len(poses_cam_to_world) |
| right_vector = sum(get_right_vectors(p) for p in poses_cam_to_world) / len(poses_cam_to_world) |
| up_world = np.array([[0.0], [0.0], [1.0], [0.0]]) |
| else: |
| up_vector = np.array([[0.0], [-1.0], [0.0], [0.0]]) |
| right_vector = np.array([[1.0], [0.0], [0.0], [0.0]]) |
| up_world = np.array([[0.0], [0.0], [1.0], [0.0]]) |
|
|
| |
| device_up_to_world_up_angle = np.arccos(np.clip(np.dot(np.transpose(up_world), |
| up_vector), -1.0, 1.0)).item() * 180.0 / np.pi |
| device_right_to_world_up_angle = np.arccos(np.clip(np.dot(np.transpose(up_world), |
| right_vector), -1.0, 1.0)).item() * 180.0 / np.pi |
|
|
| up_closest_to_90 = abs(device_up_to_world_up_angle - 90.0) < abs(device_right_to_world_up_angle - 90.0) |
| if up_closest_to_90: |
| assert abs(device_up_to_world_up_angle - 90.0) < 45.0 |
| |
| if device_right_to_world_up_angle > 90.0: |
| sky_direction_scene = 'LEFT' |
| cam_to_rotated_q = quaternion.from_rotation_vector([0.0, 0.0, math.pi / 2.0]) |
| else: |
| |
| |
| |
| sky_direction_scene = 'RIGHT' |
| cam_to_rotated_q = quaternion.from_rotation_vector([0.0, 0.0, -math.pi / 2.0]) |
| else: |
| |
| assert abs(device_right_to_world_up_angle - 90.0) < 45.0 |
| if device_up_to_world_up_angle > 90.0: |
| sky_direction_scene = 'DOWN' |
| cam_to_rotated_q = quaternion.from_rotation_vector([0.0, 0.0, math.pi]) |
| else: |
| sky_direction_scene = 'UP' |
| cam_to_rotated_q = quaternion.quaternion(1, 0, 0, 0) |
| cam_to_rotated = np.eye(4) |
| cam_to_rotated[:3, :3] = quaternion.as_rotation_matrix(cam_to_rotated_q) |
| rotated_to_cam = np.linalg.inv(cam_to_rotated) |
| return sky_direction_scene, rotated_to_cam |
|
|
|
|
| if __name__ == '__main__': |
| parser = get_parser() |
| args = parser.parse_args() |
| main(args.arkitscenes_dir, args.precomputed_pairs, args.output_dir) |
|
|