Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| # Copyright (C) 2024-present Naver Corporation. All rights reserved. | |
| # Licensed under CC BY-NC-SA 4.0 (non-commercial use only). | |
| # | |
| # -------------------------------------------------------- | |
| # Preprocessing code for the MegaDepth dataset | |
| # dataset at https://www.cs.cornell.edu/projects/megadepth/ | |
| # -------------------------------------------------------- | |
| import collections | |
| import os | |
| import os.path as osp | |
| import numpy as np | |
| from tqdm import tqdm | |
| os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" | |
| import cv2 | |
| import h5py | |
| import path_to_root # noqa | |
| from dust3r.datasets.utils import cropping # noqa | |
| from dust3r.utils.parallel import parallel_threads | |
| def get_parser(): | |
| import argparse | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--megadepth_dir", required=True) | |
| parser.add_argument("--precomputed_pairs", required=True) | |
| parser.add_argument("--output_dir", default="data/megadepth_processed") | |
| return parser | |
| def main(db_root, pairs_path, output_dir): | |
| os.makedirs(output_dir, exist_ok=True) | |
| # load all pairs | |
| data = np.load(pairs_path, allow_pickle=True) | |
| scenes = data["scenes"] | |
| images = data["images"] | |
| pairs = data["pairs"] | |
| # enumerate all unique images | |
| todo = collections.defaultdict(set) | |
| for scene, im1, im2, score in pairs: | |
| todo[scene].add(im1) | |
| todo[scene].add(im2) | |
| # for each scene, load intrinsics and then parallel crops | |
| for scene, im_idxs in tqdm(todo.items(), desc="Overall"): | |
| scene, subscene = scenes[scene].split() | |
| out_dir = osp.join(output_dir, scene, subscene) | |
| os.makedirs(out_dir, exist_ok=True) | |
| # load all camera params | |
| _, pose_w2cam, intrinsics = _load_kpts_and_poses( | |
| db_root, scene, subscene, intrinsics=True | |
| ) | |
| in_dir = osp.join(db_root, scene, "dense" + subscene) | |
| args = [ | |
| (in_dir, img, intrinsics[img], pose_w2cam[img], out_dir) | |
| for img in [images[im_id] for im_id in im_idxs] | |
| ] | |
| parallel_threads( | |
| resize_one_image, | |
| args, | |
| star_args=True, | |
| front_num=0, | |
| leave=False, | |
| desc=f"{scene}/{subscene}", | |
| ) | |
| # save pairs | |
| print("Done! prepared all pairs in", output_dir) | |
| def resize_one_image(root, tag, K_pre_rectif, pose_w2cam, out_dir): | |
| if osp.isfile(osp.join(out_dir, tag + ".npz")): | |
| return | |
| # load image | |
| img = cv2.cvtColor( | |
| cv2.imread(osp.join(root, "imgs", tag), cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB | |
| ) | |
| H, W = img.shape[:2] | |
| # load depth | |
| with h5py.File(osp.join(root, "depths", osp.splitext(tag)[0] + ".h5"), "r") as hd5: | |
| depthmap = np.asarray(hd5["depth"]) | |
| # rectify = undistort the intrinsics | |
| imsize_pre, K_pre, distortion = K_pre_rectif | |
| imsize_post = img.shape[1::-1] | |
| K_post = cv2.getOptimalNewCameraMatrix( | |
| K_pre, | |
| distortion, | |
| imsize_pre, | |
| alpha=0, | |
| newImgSize=imsize_post, | |
| centerPrincipalPoint=True, | |
| )[0] | |
| # downscale | |
| img_out, depthmap_out, intrinsics_out, R_in2out = _downscale_image( | |
| K_post, img, depthmap, resolution_out=(800, 600) | |
| ) | |
| # write everything | |
| img_out.save(osp.join(out_dir, tag + ".jpg"), quality=90) | |
| cv2.imwrite(osp.join(out_dir, tag + ".exr"), depthmap_out) | |
| camout2world = np.linalg.inv(pose_w2cam) | |
| camout2world[:3, :3] = camout2world[:3, :3] @ R_in2out.T | |
| np.savez( | |
| osp.join(out_dir, tag + ".npz"), | |
| intrinsics=intrinsics_out, | |
| cam2world=camout2world, | |
| ) | |
| def _downscale_image(camera_intrinsics, image, depthmap, resolution_out=(512, 384)): | |
| H, W = image.shape[:2] | |
| resolution_out = sorted(resolution_out)[:: +1 if W < H else -1] | |
| image, depthmap, intrinsics_out = cropping.rescale_image_depthmap( | |
| image, depthmap, camera_intrinsics, resolution_out, force=False | |
| ) | |
| R_in2out = np.eye(3) | |
| return image, depthmap, intrinsics_out, R_in2out | |
| def _load_kpts_and_poses(root, scene_id, subscene, z_only=False, intrinsics=False): | |
| if intrinsics: | |
| with open( | |
| os.path.join( | |
| root, scene_id, "sparse", "manhattan", subscene, "cameras.txt" | |
| ), | |
| "r", | |
| ) as f: | |
| raw = f.readlines()[3:] # skip the header | |
| camera_intrinsics = {} | |
| for camera in raw: | |
| camera = camera.split(" ") | |
| width, height, focal, cx, cy, k0 = [float(elem) for elem in camera[2:]] | |
| K = np.eye(3) | |
| K[0, 0] = focal | |
| K[1, 1] = focal | |
| K[0, 2] = cx | |
| K[1, 2] = cy | |
| camera_intrinsics[int(camera[0])] = ( | |
| (int(width), int(height)), | |
| K, | |
| (k0, 0, 0, 0), | |
| ) | |
| with open( | |
| os.path.join(root, scene_id, "sparse", "manhattan", subscene, "images.txt"), "r" | |
| ) as f: | |
| raw = f.read().splitlines()[4:] # skip the header | |
| extract_pose = ( | |
| colmap_raw_pose_to_principal_axis if z_only else colmap_raw_pose_to_RT | |
| ) | |
| poses = {} | |
| points3D_idxs = {} | |
| camera = [] | |
| for image, points in zip(raw[::2], raw[1::2]): | |
| image = image.split(" ") | |
| points = points.split(" ") | |
| image_id = image[-1] | |
| camera.append(int(image[-2])) | |
| # find the principal axis | |
| raw_pose = [float(elem) for elem in image[1:-2]] | |
| poses[image_id] = extract_pose(raw_pose) | |
| current_points3D_idxs = {int(i) for i in points[2::3] if i != "-1"} | |
| assert -1 not in current_points3D_idxs, bb() | |
| points3D_idxs[image_id] = current_points3D_idxs | |
| if intrinsics: | |
| image_intrinsics = { | |
| im_id: camera_intrinsics[cam] for im_id, cam in zip(poses, camera) | |
| } | |
| return points3D_idxs, poses, image_intrinsics | |
| else: | |
| return points3D_idxs, poses | |
| def colmap_raw_pose_to_principal_axis(image_pose): | |
| qvec = image_pose[:4] | |
| qvec = qvec / np.linalg.norm(qvec) | |
| w, x, y, z = qvec | |
| z_axis = np.float32( | |
| [2 * x * z - 2 * y * w, 2 * y * z + 2 * x * w, 1 - 2 * x * x - 2 * y * y] | |
| ) | |
| return z_axis | |
| def colmap_raw_pose_to_RT(image_pose): | |
| qvec = image_pose[:4] | |
| qvec = qvec / np.linalg.norm(qvec) | |
| w, x, y, z = qvec | |
| R = np.array( | |
| [ | |
| [1 - 2 * y * y - 2 * z * z, 2 * x * y - 2 * z * w, 2 * x * z + 2 * y * w], | |
| [2 * x * y + 2 * z * w, 1 - 2 * x * x - 2 * z * z, 2 * y * z - 2 * x * w], | |
| [2 * x * z - 2 * y * w, 2 * y * z + 2 * x * w, 1 - 2 * x * x - 2 * y * y], | |
| ] | |
| ) | |
| # principal_axis.append(R[2, :]) | |
| t = image_pose[4:7] | |
| # World-to-Camera pose | |
| current_pose = np.eye(4) | |
| current_pose[:3, :3] = R | |
| current_pose[:3, 3] = t | |
| return current_pose | |
| if __name__ == "__main__": | |
| parser = get_parser() | |
| args = parser.parse_args() | |
| main(args.megadepth_dir, args.precomputed_pairs, args.output_dir) | |