Spaces:

sengerchen
/

stable-virtual-camera

Runtime error

App Files Files Community

stable-virtual-camera / third_party /dust3r /datasets_preprocess /preprocess_megadepth.py

sengerchen

Upload folder using huggingface_hub

1bb1365 verified about 1 year ago

raw

history blame contribute delete

6.96 kB

	#!/usr/bin/env python3
	# Copyright (C) 2024-present Naver Corporation. All rights reserved.
	# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
	#
	# --------------------------------------------------------
	# Preprocessing code for the MegaDepth dataset
	# dataset at https://www.cs.cornell.edu/projects/megadepth/
	# --------------------------------------------------------
	import collections
	import os
	import os.path as osp

	import numpy as np
	from tqdm import tqdm

	os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
	import cv2
	import h5py
	import path_to_root # noqa
	from dust3r.datasets.utils import cropping # noqa
	from dust3r.utils.parallel import parallel_threads


	def get_parser():
	import argparse

	parser = argparse.ArgumentParser()
	parser.add_argument("--megadepth_dir", required=True)
	parser.add_argument("--precomputed_pairs", required=True)
	parser.add_argument("--output_dir", default="data/megadepth_processed")
	return parser


	def main(db_root, pairs_path, output_dir):
	os.makedirs(output_dir, exist_ok=True)

	# load all pairs
	data = np.load(pairs_path, allow_pickle=True)
	scenes = data["scenes"]
	images = data["images"]
	pairs = data["pairs"]

	# enumerate all unique images
	todo = collections.defaultdict(set)
	for scene, im1, im2, score in pairs:
	todo[scene].add(im1)
	todo[scene].add(im2)

	# for each scene, load intrinsics and then parallel crops
	for scene, im_idxs in tqdm(todo.items(), desc="Overall"):
	scene, subscene = scenes[scene].split()
	out_dir = osp.join(output_dir, scene, subscene)
	os.makedirs(out_dir, exist_ok=True)

	# load all camera params
	_, pose_w2cam, intrinsics = _load_kpts_and_poses(
	db_root, scene, subscene, intrinsics=True
	)

	in_dir = osp.join(db_root, scene, "dense" + subscene)
	args = [
	(in_dir, img, intrinsics[img], pose_w2cam[img], out_dir)
	for img in [images[im_id] for im_id in im_idxs]
	]
	parallel_threads(
	resize_one_image,
	args,
	star_args=True,
	front_num=0,
	leave=False,
	desc=f"{scene}/{subscene}",
	)

	# save pairs
	print("Done! prepared all pairs in", output_dir)


	def resize_one_image(root, tag, K_pre_rectif, pose_w2cam, out_dir):
	if osp.isfile(osp.join(out_dir, tag + ".npz")):
	return

	# load image
	img = cv2.cvtColor(
	cv2.imread(osp.join(root, "imgs", tag), cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB
	)
	H, W = img.shape[:2]

	# load depth
	with h5py.File(osp.join(root, "depths", osp.splitext(tag)[0] + ".h5"), "r") as hd5:
	depthmap = np.asarray(hd5["depth"])

	# rectify = undistort the intrinsics
	imsize_pre, K_pre, distortion = K_pre_rectif
	imsize_post = img.shape[1::-1]
	K_post = cv2.getOptimalNewCameraMatrix(
	K_pre,
	distortion,
	imsize_pre,
	alpha=0,
	newImgSize=imsize_post,
	centerPrincipalPoint=True,
	)[0]

	# downscale
	img_out, depthmap_out, intrinsics_out, R_in2out = _downscale_image(
	K_post, img, depthmap, resolution_out=(800, 600)
	)

	# write everything
	img_out.save(osp.join(out_dir, tag + ".jpg"), quality=90)
	cv2.imwrite(osp.join(out_dir, tag + ".exr"), depthmap_out)

	camout2world = np.linalg.inv(pose_w2cam)
	camout2world[:3, :3] = camout2world[:3, :3] @ R_in2out.T
	np.savez(
	osp.join(out_dir, tag + ".npz"),
	intrinsics=intrinsics_out,
	cam2world=camout2world,
	)


	def _downscale_image(camera_intrinsics, image, depthmap, resolution_out=(512, 384)):
	H, W = image.shape[:2]
	resolution_out = sorted(resolution_out)[:: +1 if W < H else -1]

	image, depthmap, intrinsics_out = cropping.rescale_image_depthmap(
	image, depthmap, camera_intrinsics, resolution_out, force=False
	)
	R_in2out = np.eye(3)

	return image, depthmap, intrinsics_out, R_in2out


	def _load_kpts_and_poses(root, scene_id, subscene, z_only=False, intrinsics=False):
	if intrinsics:
	with open(
	os.path.join(
	root, scene_id, "sparse", "manhattan", subscene, "cameras.txt"
	),
	"r",
	) as f:
	raw = f.readlines()[3:] # skip the header

	camera_intrinsics = {}
	for camera in raw:
	camera = camera.split(" ")
	width, height, focal, cx, cy, k0 = [float(elem) for elem in camera[2:]]
	K = np.eye(3)
	K[0, 0] = focal
	K[1, 1] = focal
	K[0, 2] = cx
	K[1, 2] = cy
	camera_intrinsics[int(camera[0])] = (
	(int(width), int(height)),
	K,
	(k0, 0, 0, 0),
	)

	with open(
	os.path.join(root, scene_id, "sparse", "manhattan", subscene, "images.txt"), "r"
	) as f:
	raw = f.read().splitlines()[4:] # skip the header

	extract_pose = (
	colmap_raw_pose_to_principal_axis if z_only else colmap_raw_pose_to_RT
	)

	poses = {}
	points3D_idxs = {}
	camera = []

	for image, points in zip(raw[::2], raw[1::2]):
	image = image.split(" ")
	points = points.split(" ")

	image_id = image[-1]
	camera.append(int(image[-2]))

	# find the principal axis
	raw_pose = [float(elem) for elem in image[1:-2]]
	poses[image_id] = extract_pose(raw_pose)

	current_points3D_idxs = {int(i) for i in points[2::3] if i != "-1"}
	assert -1 not in current_points3D_idxs, bb()
	points3D_idxs[image_id] = current_points3D_idxs

	if intrinsics:
	image_intrinsics = {
	im_id: camera_intrinsics[cam] for im_id, cam in zip(poses, camera)
	}
	return points3D_idxs, poses, image_intrinsics
	else:
	return points3D_idxs, poses


	def colmap_raw_pose_to_principal_axis(image_pose):
	qvec = image_pose[:4]
	qvec = qvec / np.linalg.norm(qvec)
	w, x, y, z = qvec
	z_axis = np.float32(
	[2 * x * z - 2 * y * w, 2 * y * z + 2 * x * w, 1 - 2 * x * x - 2 * y * y]
	)
	return z_axis


	def colmap_raw_pose_to_RT(image_pose):
	qvec = image_pose[:4]
	qvec = qvec / np.linalg.norm(qvec)
	w, x, y, z = qvec
	R = np.array(
	[
	[1 - 2 * y * y - 2 * z * z, 2 * x * y - 2 * z * w, 2 * x * z + 2 * y * w],
	[2 * x * y + 2 * z * w, 1 - 2 * x * x - 2 * z * z, 2 * y * z - 2 * x * w],
	[2 * x * z - 2 * y * w, 2 * y * z + 2 * x * w, 1 - 2 * x * x - 2 * y * y],
	]
	)
	# principal_axis.append(R[2, :])
	t = image_pose[4:7]
	# World-to-Camera pose
	current_pose = np.eye(4)
	current_pose[:3, :3] = R
	current_pose[:3, 3] = t
	return current_pose


	if __name__ == "__main__":
	parser = get_parser()
	args = parser.parse_args()
	main(args.megadepth_dir, args.precomputed_pairs, args.output_dir)