|
|
import os |
|
|
import json |
|
|
import os.path as osp |
|
|
import decimal |
|
|
import argparse |
|
|
import math |
|
|
from bisect import bisect_left |
|
|
from PIL import Image |
|
|
import numpy as np |
|
|
import quaternion |
|
|
from scipy import interpolate |
|
|
import cv2 |
|
|
from tqdm import tqdm |
|
|
from multiprocessing import Pool |
|
|
|
|
|
|
|
|
def get_parser(): |
|
|
parser = argparse.ArgumentParser() |
|
|
parser.add_argument( |
|
|
"--arkitscenes_dir", |
|
|
default="", |
|
|
) |
|
|
parser.add_argument( |
|
|
"--output_dir", |
|
|
default="data/dust3r_data/processed_arkitscenes_highres", |
|
|
) |
|
|
return parser |
|
|
|
|
|
|
|
|
def value_to_decimal(value, decimal_places): |
|
|
decimal.getcontext().rounding = decimal.ROUND_HALF_UP |
|
|
return decimal.Decimal(str(float(value))).quantize( |
|
|
decimal.Decimal("1e-{}".format(decimal_places)) |
|
|
) |
|
|
|
|
|
|
|
|
def closest(value, sorted_list): |
|
|
index = bisect_left(sorted_list, value) |
|
|
if index == 0: |
|
|
return sorted_list[0] |
|
|
elif index == len(sorted_list): |
|
|
return sorted_list[-1] |
|
|
else: |
|
|
value_before = sorted_list[index - 1] |
|
|
value_after = sorted_list[index] |
|
|
if value_after - value < value - value_before: |
|
|
return value_after |
|
|
else: |
|
|
return value_before |
|
|
|
|
|
|
|
|
def get_up_vectors(pose_device_to_world): |
|
|
return np.matmul(pose_device_to_world, np.array([[0.0], [-1.0], [0.0], [0.0]])) |
|
|
|
|
|
|
|
|
def get_right_vectors(pose_device_to_world): |
|
|
return np.matmul(pose_device_to_world, np.array([[1.0], [0.0], [0.0], [0.0]])) |
|
|
|
|
|
|
|
|
def read_traj(traj_path): |
|
|
quaternions = [] |
|
|
poses = [] |
|
|
timestamps = [] |
|
|
poses_p_to_w = [] |
|
|
with open(traj_path) as f: |
|
|
traj_lines = f.readlines() |
|
|
for line in traj_lines: |
|
|
tokens = line.split() |
|
|
assert len(tokens) == 7 |
|
|
traj_timestamp = float(tokens[0]) |
|
|
|
|
|
timestamps_decimal_value = value_to_decimal(traj_timestamp, 3) |
|
|
timestamps.append( |
|
|
float(timestamps_decimal_value) |
|
|
) |
|
|
|
|
|
angle_axis = [float(tokens[1]), float(tokens[2]), float(tokens[3])] |
|
|
r_w_to_p, _ = cv2.Rodrigues(np.asarray(angle_axis)) |
|
|
t_w_to_p = np.asarray( |
|
|
[float(tokens[4]), float(tokens[5]), float(tokens[6])] |
|
|
) |
|
|
|
|
|
pose_w_to_p = np.eye(4) |
|
|
pose_w_to_p[:3, :3] = r_w_to_p |
|
|
pose_w_to_p[:3, 3] = t_w_to_p |
|
|
|
|
|
pose_p_to_w = np.linalg.inv(pose_w_to_p) |
|
|
|
|
|
r_p_to_w_as_quat = quaternion.from_rotation_matrix(pose_p_to_w[:3, :3]) |
|
|
t_p_to_w = pose_p_to_w[:3, 3] |
|
|
poses_p_to_w.append(pose_p_to_w) |
|
|
poses.append(t_p_to_w) |
|
|
quaternions.append(r_p_to_w_as_quat) |
|
|
return timestamps, poses, quaternions, poses_p_to_w |
|
|
|
|
|
|
|
|
def main(rootdir, outdir): |
|
|
os.makedirs(outdir, exist_ok=True) |
|
|
subdirs = ["Validation", "Training"] |
|
|
for subdir in subdirs: |
|
|
outsubdir = osp.join(outdir, subdir) |
|
|
scene_dirs = sorted( |
|
|
[ |
|
|
d |
|
|
for d in os.listdir(osp.join(rootdir, subdir)) |
|
|
if osp.isdir(osp.join(rootdir, subdir, d)) |
|
|
] |
|
|
) |
|
|
|
|
|
with Pool() as pool: |
|
|
results = list( |
|
|
tqdm( |
|
|
pool.imap( |
|
|
process_scene, |
|
|
[ |
|
|
(rootdir, outdir, subdir, scene_subdir) |
|
|
for scene_subdir in scene_dirs |
|
|
], |
|
|
), |
|
|
total=len(scene_dirs), |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
valid_scenes = [result for result in results if result is not None] |
|
|
outlistfile = osp.join(outsubdir, "scene_list.json") |
|
|
with open(outlistfile, "w") as f: |
|
|
json.dump(valid_scenes, f) |
|
|
|
|
|
|
|
|
def process_scene(args): |
|
|
rootdir, outdir, subdir, scene_subdir = args |
|
|
|
|
|
scene_dir = osp.join(rootdir, subdir, scene_subdir) |
|
|
outsubdir = osp.join(outdir, subdir) |
|
|
out_scene_subdir = osp.join(outsubdir, scene_subdir) |
|
|
|
|
|
|
|
|
if ( |
|
|
not osp.exists(osp.join(scene_dir, "highres_depth")) |
|
|
or not osp.exists(osp.join(scene_dir, "vga_wide")) |
|
|
or not osp.exists(osp.join(scene_dir, "vga_wide_intrinsics")) |
|
|
or not osp.exists(osp.join(scene_dir, "lowres_wide.traj")) |
|
|
): |
|
|
return None |
|
|
|
|
|
depth_dir = osp.join(scene_dir, "highres_depth") |
|
|
rgb_dir = osp.join(scene_dir, "vga_wide") |
|
|
intrinsics_dir = osp.join(scene_dir, "vga_wide_intrinsics") |
|
|
traj_path = osp.join(scene_dir, "lowres_wide.traj") |
|
|
|
|
|
depth_files = sorted(os.listdir(depth_dir)) |
|
|
img_files = sorted(os.listdir(rgb_dir)) |
|
|
|
|
|
out_scene_subdir = osp.join(outsubdir, scene_subdir) |
|
|
|
|
|
|
|
|
scene_metadata_path = osp.join(out_scene_subdir, "scene_metadata.npz") |
|
|
if osp.isfile(scene_metadata_path): |
|
|
print(f"Skipping {scene_subdir}") |
|
|
else: |
|
|
print(f"parsing {scene_subdir}") |
|
|
|
|
|
timestamps, poses, quaternions, poses_cam_to_world = read_traj(traj_path) |
|
|
|
|
|
poses = np.array(poses) |
|
|
quaternions = np.array(quaternions, dtype=np.quaternion) |
|
|
quaternions = quaternion.unflip_rotors(quaternions) |
|
|
timestamps = np.array(timestamps) |
|
|
|
|
|
all_depths = sorted( |
|
|
[ |
|
|
(basename, basename.split(".png")[0].split("_")[1]) |
|
|
for basename in depth_files |
|
|
], |
|
|
key=lambda x: float(x[1]), |
|
|
) |
|
|
|
|
|
selected_depths = [] |
|
|
timestamps_selected = [] |
|
|
timestamp_min = timestamps.min() |
|
|
timestamp_max = timestamps.max() |
|
|
for basename, frame_id in all_depths: |
|
|
frame_id = float(frame_id) |
|
|
if frame_id < timestamp_min or frame_id > timestamp_max: |
|
|
continue |
|
|
selected_depths.append((basename, frame_id)) |
|
|
timestamps_selected.append(frame_id) |
|
|
|
|
|
sky_direction_scene, trajectories, intrinsics, images, depths = ( |
|
|
convert_scene_metadata( |
|
|
scene_subdir, |
|
|
intrinsics_dir, |
|
|
timestamps, |
|
|
quaternions, |
|
|
poses, |
|
|
poses_cam_to_world, |
|
|
img_files, |
|
|
selected_depths, |
|
|
timestamps_selected, |
|
|
) |
|
|
) |
|
|
|
|
|
if len(images) == 0: |
|
|
print(f"Skipping {scene_subdir}") |
|
|
return None |
|
|
|
|
|
os.makedirs(out_scene_subdir, exist_ok=True) |
|
|
|
|
|
os.makedirs(os.path.join(out_scene_subdir, "vga_wide"), exist_ok=True) |
|
|
os.makedirs(os.path.join(out_scene_subdir, "highres_depth"), exist_ok=True) |
|
|
assert isinstance(sky_direction_scene, str) |
|
|
|
|
|
for image_path, depth_path in zip(images, depths): |
|
|
img_out = os.path.join( |
|
|
out_scene_subdir, "vga_wide", image_path.replace(".png", ".jpg") |
|
|
) |
|
|
depth_out = os.path.join(out_scene_subdir, "highres_depth", depth_path) |
|
|
if osp.isfile(img_out) and osp.isfile(depth_out): |
|
|
continue |
|
|
|
|
|
vga_wide_path = osp.join(rgb_dir, image_path) |
|
|
depth_path = osp.join(depth_dir, depth_path) |
|
|
|
|
|
if not osp.isfile(vga_wide_path) or not osp.isfile(depth_path): |
|
|
continue |
|
|
|
|
|
img = Image.open(vga_wide_path) |
|
|
depth = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED) |
|
|
|
|
|
|
|
|
if sky_direction_scene == "RIGHT": |
|
|
try: |
|
|
img = img.transpose(Image.Transpose.ROTATE_90) |
|
|
except Exception: |
|
|
img = img.transpose(Image.ROTATE_90) |
|
|
depth = cv2.rotate(depth, cv2.ROTATE_90_COUNTERCLOCKWISE) |
|
|
|
|
|
elif sky_direction_scene == "LEFT": |
|
|
try: |
|
|
img = img.transpose(Image.Transpose.ROTATE_270) |
|
|
except Exception: |
|
|
img = img.transpose(Image.ROTATE_270) |
|
|
depth = cv2.rotate(depth, cv2.ROTATE_90_CLOCKWISE) |
|
|
|
|
|
elif sky_direction_scene == "DOWN": |
|
|
try: |
|
|
img = img.transpose(Image.Transpose.ROTATE_180) |
|
|
except Exception: |
|
|
img = img.transpose(Image.ROTATE_180) |
|
|
depth = cv2.rotate(depth, cv2.ROTATE_180) |
|
|
|
|
|
W, H = img.size |
|
|
if not osp.isfile(img_out): |
|
|
img.save(img_out) |
|
|
|
|
|
depth = cv2.resize(depth, (W, H), interpolation=cv2.INTER_NEAREST) |
|
|
if not osp.isfile( |
|
|
depth_out |
|
|
): |
|
|
cv2.imwrite(depth_out, depth) |
|
|
|
|
|
|
|
|
np.savez( |
|
|
scene_metadata_path, |
|
|
trajectories=trajectories, |
|
|
intrinsics=intrinsics, |
|
|
images=images, |
|
|
) |
|
|
|
|
|
|
|
|
def convert_scene_metadata( |
|
|
scene_subdir, |
|
|
intrinsics_dir, |
|
|
timestamps, |
|
|
quaternions, |
|
|
poses, |
|
|
poses_cam_to_world, |
|
|
all_images, |
|
|
selected_depths, |
|
|
timestamps_selected, |
|
|
): |
|
|
|
|
|
sky_direction_scene, rotated_to_cam = find_scene_orientation(poses_cam_to_world) |
|
|
|
|
|
|
|
|
|
|
|
timestamps_selected = np.array(timestamps_selected) |
|
|
spline = interpolate.interp1d(timestamps, poses, kind="linear", axis=0) |
|
|
interpolated_rotations = quaternion.squad( |
|
|
quaternions, timestamps, timestamps_selected |
|
|
) |
|
|
interpolated_positions = spline(timestamps_selected) |
|
|
|
|
|
trajectories = [] |
|
|
intrinsics = [] |
|
|
images = [] |
|
|
depths = [] |
|
|
for i, (basename, frame_id) in enumerate(selected_depths): |
|
|
intrinsic_fn = osp.join(intrinsics_dir, f"{scene_subdir}_{frame_id}.pincam") |
|
|
search_interval = int(0.1 / 0.001) |
|
|
for timestamp in range(-search_interval, search_interval + 1): |
|
|
if osp.exists(intrinsic_fn): |
|
|
break |
|
|
intrinsic_fn = osp.join( |
|
|
intrinsics_dir, |
|
|
f"{scene_subdir}_{float(frame_id) + timestamp * 0.001:.3f}.pincam", |
|
|
) |
|
|
if not osp.exists(intrinsic_fn): |
|
|
print(f"Skipping {intrinsic_fn}") |
|
|
continue |
|
|
|
|
|
image_path = "{}_{}.png".format(scene_subdir, frame_id) |
|
|
search_interval = int(0.001 / 0.001) |
|
|
for timestamp in range(-search_interval, search_interval + 1): |
|
|
if image_path in all_images: |
|
|
break |
|
|
image_path = "{}_{}.png".format( |
|
|
scene_subdir, float(frame_id) + timestamp * 0.001 |
|
|
) |
|
|
if image_path not in all_images: |
|
|
print(f"Skipping {scene_subdir} {frame_id}") |
|
|
continue |
|
|
|
|
|
w, h, fx, fy, hw, hh = np.loadtxt(intrinsic_fn) |
|
|
|
|
|
pose = np.eye(4) |
|
|
pose[:3, :3] = quaternion.as_rotation_matrix(interpolated_rotations[i]) |
|
|
pose[:3, 3] = interpolated_positions[i] |
|
|
|
|
|
images.append(basename) |
|
|
depths.append(basename) |
|
|
if sky_direction_scene == "RIGHT" or sky_direction_scene == "LEFT": |
|
|
intrinsics.append([h, w, fy, fx, hh, hw]) |
|
|
else: |
|
|
intrinsics.append([w, h, fx, fy, hw, hh]) |
|
|
trajectories.append( |
|
|
pose @ rotated_to_cam |
|
|
) |
|
|
|
|
|
return sky_direction_scene, trajectories, intrinsics, images, depths |
|
|
|
|
|
|
|
|
def find_scene_orientation(poses_cam_to_world): |
|
|
if len(poses_cam_to_world) > 0: |
|
|
up_vector = sum(get_up_vectors(p) for p in poses_cam_to_world) / len( |
|
|
poses_cam_to_world |
|
|
) |
|
|
right_vector = sum(get_right_vectors(p) for p in poses_cam_to_world) / len( |
|
|
poses_cam_to_world |
|
|
) |
|
|
up_world = np.array([[0.0], [0.0], [1.0], [0.0]]) |
|
|
else: |
|
|
up_vector = np.array([[0.0], [-1.0], [0.0], [0.0]]) |
|
|
right_vector = np.array([[1.0], [0.0], [0.0], [0.0]]) |
|
|
up_world = np.array([[0.0], [0.0], [1.0], [0.0]]) |
|
|
|
|
|
|
|
|
device_up_to_world_up_angle = ( |
|
|
np.arccos(np.clip(np.dot(np.transpose(up_world), up_vector), -1.0, 1.0)).item() |
|
|
* 180.0 |
|
|
/ np.pi |
|
|
) |
|
|
device_right_to_world_up_angle = ( |
|
|
np.arccos( |
|
|
np.clip(np.dot(np.transpose(up_world), right_vector), -1.0, 1.0) |
|
|
).item() |
|
|
* 180.0 |
|
|
/ np.pi |
|
|
) |
|
|
|
|
|
up_closest_to_90 = abs(device_up_to_world_up_angle - 90.0) < abs( |
|
|
device_right_to_world_up_angle - 90.0 |
|
|
) |
|
|
if up_closest_to_90: |
|
|
assert abs(device_up_to_world_up_angle - 90.0) < 45.0 |
|
|
|
|
|
if device_right_to_world_up_angle > 90.0: |
|
|
sky_direction_scene = "LEFT" |
|
|
cam_to_rotated_q = quaternion.from_rotation_vector( |
|
|
[0.0, 0.0, math.pi / 2.0] |
|
|
) |
|
|
else: |
|
|
|
|
|
|
|
|
|
|
|
sky_direction_scene = "RIGHT" |
|
|
cam_to_rotated_q = quaternion.from_rotation_vector( |
|
|
[0.0, 0.0, -math.pi / 2.0] |
|
|
) |
|
|
else: |
|
|
|
|
|
assert abs(device_right_to_world_up_angle - 90.0) < 45.0 |
|
|
if device_up_to_world_up_angle > 90.0: |
|
|
sky_direction_scene = "DOWN" |
|
|
cam_to_rotated_q = quaternion.from_rotation_vector([0.0, 0.0, math.pi]) |
|
|
else: |
|
|
sky_direction_scene = "UP" |
|
|
cam_to_rotated_q = quaternion.quaternion(1, 0, 0, 0) |
|
|
cam_to_rotated = np.eye(4) |
|
|
cam_to_rotated[:3, :3] = quaternion.as_rotation_matrix(cam_to_rotated_q) |
|
|
rotated_to_cam = np.linalg.inv(cam_to_rotated) |
|
|
return sky_direction_scene, rotated_to_cam |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
parser = get_parser() |
|
|
args = parser.parse_args() |
|
|
main(args.arkitscenes_dir, args.output_dir) |
|
|
|