| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import argparse |
| | import os |
| | from pathlib import Path |
| |
|
| | import numpy as np |
| | import json |
| | import copy |
| | from pyquaternion import Quaternion |
| | from tqdm import tqdm |
| | from PIL import Image |
| |
|
| | def rotate_img(img_path, degree=90): |
| | img = Image.open(img_path) |
| | img = img.rotate(degree, expand=1) |
| | img.save(img_path, quality=100, subsampling=0) |
| | |
| | def rotate_camera(c2w, degree=90): |
| | rad = np.deg2rad(degree) |
| | R = Quaternion(axis=[0, 0, -1], angle=rad) |
| | T = R.transformation_matrix |
| | return c2w @ T |
| |
|
| | def swap_axes(c2w): |
| | rad = np.pi / 2 |
| | R = Quaternion(axis=[1, 0, 0], angle=rad) |
| | T = R.transformation_matrix |
| | return T @ c2w |
| |
|
| | |
| | def find_transforms_center_and_scale(raw_transforms): |
| | print("computing center of attention...") |
| | frames = raw_transforms['frames'] |
| | for frame in frames: |
| | frame['transform_matrix'] = np.array(frame['transform_matrix']) |
| |
|
| | rays_o = [] |
| | rays_d = [] |
| | for f in tqdm(frames): |
| | mf = f["transform_matrix"][0:3,:] |
| | rays_o.append(mf[:3,3:]) |
| | rays_d.append(mf[:3,2:3]) |
| | rays_o = np.asarray(rays_o) |
| | rays_d = np.asarray(rays_d) |
| |
|
| | |
| | def min_line_dist(rays_o, rays_d): |
| | A_i = np.eye(3) - rays_d * np.transpose(rays_d, [0,2,1]) |
| | b_i = -A_i @ rays_o |
| | pt_mindist = np.squeeze(-np.linalg.inv((np.transpose(A_i, [0,2,1]) @ A_i).mean(0)) @ (b_i).mean(0)) |
| | return pt_mindist |
| |
|
| | translation = min_line_dist(rays_o, rays_d) |
| | normalized_transforms = copy.deepcopy(raw_transforms) |
| | for f in normalized_transforms["frames"]: |
| | f["transform_matrix"][0:3,3] -= translation |
| |
|
| | |
| | avglen = 0. |
| | for f in normalized_transforms["frames"]: |
| | avglen += np.linalg.norm(f["transform_matrix"][0:3,3]) |
| | nframes = len(normalized_transforms["frames"]) |
| | avglen /= nframes |
| | print("avg camera distance from origin", avglen) |
| | scale = 4.0 / avglen |
| |
|
| | return translation, scale |
| |
|
| | def normalize_transforms(transforms, translation, scale): |
| | normalized_transforms = copy.deepcopy(transforms) |
| | for f in normalized_transforms["frames"]: |
| | f["transform_matrix"] = np.asarray(f["transform_matrix"]) |
| | f["transform_matrix"][0:3,3] -= translation |
| | f["transform_matrix"][0:3,3] *= scale |
| | f["transform_matrix"] = f["transform_matrix"].tolist() |
| | return normalized_transforms |
| |
|
| | def parse_args(): |
| | parser = argparse.ArgumentParser(description="convert a Record3D capture to nerf format transforms.json") |
| | parser.add_argument("--scene", default="", help="path to the Record3D capture") |
| | parser.add_argument("--rotate", action="store_true", help="rotate the dataset") |
| | parser.add_argument("--subsample", default=1, type=int, help="step size of subsampling") |
| | args = parser.parse_args() |
| | return args |
| |
|
| | if __name__ == "__main__": |
| | args = parse_args() |
| | dataset_dir = Path(args.scene) |
| | with open(dataset_dir / 'metadata') as f: |
| | metadata = json.load(f) |
| |
|
| | frames = [] |
| | n_images = len(list((dataset_dir / 'rgbd').glob('*.jpg'))) |
| | poses = np.array(metadata['poses']) |
| | for idx in tqdm(range(n_images)): |
| | |
| | img_name = f'{idx}.jpg' |
| | img_path = dataset_dir / 'rgbd' / img_name |
| | |
| | |
| | if args.rotate: |
| | |
| | rotate_img(img_path) |
| | |
| | |
| | """ Each `pose` is a 7-element tuple which contains quaternion + world position. |
| | [qx, qy, qz, qw, tx, ty, tz] |
| | """ |
| | pose = poses[idx] |
| | q = Quaternion(x=pose[0], y=pose[1], z=pose[2], w=pose[3]) |
| | c2w = np.eye(4) |
| | c2w[:3, :3] = q.rotation_matrix |
| | c2w[:3, -1] = [pose[4], pose[5], pose[6]] |
| | if args.rotate: |
| | c2w = rotate_camera(c2w) |
| | c2w = swap_axes(c2w) |
| | |
| | frames.append( |
| | { |
| | "file_path": f"./rgbd/{img_name}", |
| | "transform_matrix": c2w.tolist(), |
| | } |
| | ) |
| |
|
| | |
| | if not args.rotate: |
| | h = metadata['h'] |
| | w = metadata['w'] |
| | K = np.array(metadata['K']).reshape([3, 3]).T |
| | fx = K[0, 0] |
| | fy = K[1, 1] |
| | cx = K[0, 2] |
| | cy = K[1, 2] |
| | else: |
| | h = metadata['w'] |
| | w = metadata['h'] |
| | K = np.array(metadata['K']).reshape([3, 3]).T |
| | fx = K[1, 1] |
| | fy = K[0, 0] |
| | cx = K[1, 2] |
| | cy = h - K[0, 2] |
| |
|
| | transforms = {} |
| | transforms['fl_x'] = fx |
| | transforms['fl_y'] = fy |
| | transforms['cx'] = cx |
| | transforms['cy'] = cy |
| | transforms['w'] = w |
| | transforms['h'] = h |
| | transforms['aabb_scale'] = 16 |
| | transforms['scale'] = 1.0 |
| | transforms['camera_angle_x'] = 2 * np.arctan(transforms['w'] / (2 * transforms['fl_x'])) |
| | transforms['camera_angle_y'] = 2 * np.arctan(transforms['h'] / (2 * transforms['fl_y'])) |
| | transforms['frames'] = frames |
| |
|
| | os.makedirs(dataset_dir / 'arkit_transforms', exist_ok=True) |
| | with open(dataset_dir / 'arkit_transforms' / 'transforms.json', 'w') as fp: |
| | json.dump(transforms, fp, indent=2) |
| |
|
| | |
| | transforms['frames'] = transforms['frames'][::args.subsample] |
| | translation, scale = find_transforms_center_and_scale(transforms) |
| | normalized_transforms = normalize_transforms(transforms, translation, scale) |
| |
|
| | output_path = dataset_dir / 'transforms.json' |
| | with open(output_path, "w") as outfile: |
| | json.dump(normalized_transforms, outfile, indent=2) |