|
|
|
|
|
""" |
|
|
Preprocess the Dynamic Replica dataset. |
|
|
|
|
|
This script reads frame annotations (stored in compressed JSON files), |
|
|
loads images, depth maps, optical flow, and camera parameters, and saves |
|
|
processed images, depth maps, flow files, and camera metadata (intrinsics and poses) |
|
|
to an output directory organized by split, sequence, and camera view. |
|
|
|
|
|
Usage: |
|
|
python preprocess_dynamic_replica.py --root_dir /path/to/data_dynamic_replica \ |
|
|
--out_dir /path/to/processed_dynamic_replica \ |
|
|
[--splits train valid test] \ |
|
|
[--num_processes 8] |
|
|
""" |
|
|
|
|
|
import argparse |
|
|
import gzip |
|
|
import json |
|
|
import os |
|
|
import os.path as osp |
|
|
import re |
|
|
import shutil |
|
|
import time |
|
|
from collections import defaultdict |
|
|
from dataclasses import dataclass |
|
|
from multiprocessing import Pool, cpu_count |
|
|
from typing import List, Optional |
|
|
|
|
|
import cv2 |
|
|
import matplotlib.pyplot as plt |
|
|
import numpy as np |
|
|
import PIL.Image |
|
|
import torch |
|
|
from PIL import Image |
|
|
from pytorch3d.implicitron.dataset.types import ( |
|
|
FrameAnnotation as ImplicitronFrameAnnotation, |
|
|
load_dataclass, |
|
|
) |
|
|
from tqdm import tqdm |
|
|
import imageio |
|
|
|
|
|
|
|
|
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" |
|
|
|
|
|
TAG_CHAR = np.array([202021.25], np.float32) |
|
|
|
|
|
|
|
|
def readFlow(fn): |
|
|
"""Read .flo file in Middlebury format.""" |
|
|
with open(fn, "rb") as f: |
|
|
magic = np.fromfile(f, np.float32, count=1) |
|
|
if 202021.25 != magic: |
|
|
print("Magic number incorrect. Invalid .flo file") |
|
|
return None |
|
|
else: |
|
|
w = np.fromfile(f, np.int32, count=1) |
|
|
h = np.fromfile(f, np.int32, count=1) |
|
|
data = np.fromfile(f, np.float32, count=2 * int(w) * int(h)) |
|
|
return np.resize(data, (int(h), int(w), 2)) |
|
|
|
|
|
|
|
|
def readPFM(file): |
|
|
with open(file, "rb") as f: |
|
|
header = f.readline().rstrip() |
|
|
if header == b"PF": |
|
|
color = True |
|
|
elif header == b"Pf": |
|
|
color = False |
|
|
else: |
|
|
raise Exception("Not a PFM file.") |
|
|
|
|
|
dim_match = re.match(rb"^(\d+)\s(\d+)\s$", f.readline()) |
|
|
if dim_match: |
|
|
width, height = map(int, dim_match.groups()) |
|
|
else: |
|
|
raise Exception("Malformed PFM header.") |
|
|
|
|
|
scale = float(f.readline().rstrip()) |
|
|
endian = "<" if scale < 0 else ">" |
|
|
if scale < 0: |
|
|
scale = -scale |
|
|
|
|
|
data = np.fromfile(f, endian + "f") |
|
|
shape = (height, width, 3) if color else (height, width) |
|
|
data = np.reshape(data, shape) |
|
|
data = np.flipud(data) |
|
|
return data |
|
|
|
|
|
|
|
|
def read_gen(file_name, pil=False): |
|
|
ext = osp.splitext(file_name)[-1].lower() |
|
|
if ext in [".png", ".jpeg", ".ppm", ".jpg"]: |
|
|
return Image.open(file_name) |
|
|
elif ext in [".bin", ".raw"]: |
|
|
return np.load(file_name) |
|
|
elif ext == ".flo": |
|
|
return readFlow(file_name).astype(np.float32) |
|
|
elif ext == ".pfm": |
|
|
flow = readPFM(file_name).astype(np.float32) |
|
|
return flow if len(flow.shape) == 2 else flow[:, :, :-1] |
|
|
return [] |
|
|
|
|
|
|
|
|
def _load_16big_png_depth(depth_png): |
|
|
with Image.open(depth_png) as depth_pil: |
|
|
depth = ( |
|
|
np.frombuffer(np.array(depth_pil, dtype=np.uint16), dtype=np.float16) |
|
|
.astype(np.float32) |
|
|
.reshape((depth_pil.size[1], depth_pil.size[0])) |
|
|
) |
|
|
return depth |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class DynamicReplicaFrameAnnotation(ImplicitronFrameAnnotation): |
|
|
"""A dataclass used to load annotations from .json for Dynamic Replica.""" |
|
|
|
|
|
camera_name: Optional[str] = None |
|
|
instance_id_map_path: Optional[str] = None |
|
|
flow_forward: Optional[str] = None |
|
|
flow_forward_mask: Optional[str] = None |
|
|
flow_backward: Optional[str] = None |
|
|
flow_backward_mask: Optional[str] = None |
|
|
trajectories: Optional[str] = None |
|
|
|
|
|
|
|
|
def _get_pytorch3d_camera(entry_viewpoint, image_size, scale: float): |
|
|
""" |
|
|
Convert the camera parameters stored in an annotation to PyTorch3D convention. |
|
|
|
|
|
Returns: |
|
|
R, tvec, focal, principal_point |
|
|
""" |
|
|
assert entry_viewpoint is not None |
|
|
principal_point = torch.tensor(entry_viewpoint.principal_point, dtype=torch.float) |
|
|
focal_length = torch.tensor(entry_viewpoint.focal_length, dtype=torch.float) |
|
|
half_image_size_wh_orig = ( |
|
|
torch.tensor(list(reversed(image_size)), dtype=torch.float) / 2.0 |
|
|
) |
|
|
|
|
|
fmt = entry_viewpoint.intrinsics_format |
|
|
if fmt.lower() == "ndc_norm_image_bounds": |
|
|
rescale = half_image_size_wh_orig |
|
|
elif fmt.lower() == "ndc_isotropic": |
|
|
rescale = half_image_size_wh_orig.min() |
|
|
else: |
|
|
raise ValueError(f"Unknown intrinsics format: {fmt}") |
|
|
|
|
|
principal_point_px = half_image_size_wh_orig - principal_point * rescale |
|
|
focal_length_px = focal_length * rescale |
|
|
|
|
|
|
|
|
R = torch.tensor(entry_viewpoint.R, dtype=torch.float) |
|
|
T = torch.tensor(entry_viewpoint.T, dtype=torch.float) |
|
|
R_pytorch3d = R.clone() |
|
|
T_pytorch3d = T.clone() |
|
|
T_pytorch3d[..., :2] *= -1 |
|
|
R_pytorch3d[..., :, :2] *= -1 |
|
|
tvec = T_pytorch3d |
|
|
|
|
|
return R, tvec, focal_length_px, principal_point_px |
|
|
|
|
|
|
|
|
|
|
|
SPLITS = ["train", "valid", "test"] |
|
|
|
|
|
|
|
|
|
|
|
def process_split_data(args): |
|
|
""" |
|
|
Process all frames for a given split. |
|
|
|
|
|
Reads the frame annotation file for the given split, groups frames per sequence |
|
|
and camera, and for each frame loads the image, depth map, optical flows (if available), |
|
|
computes the camera intrinsics and pose (using _get_pytorch3d_camera), and saves the data. |
|
|
""" |
|
|
split, root_dir, out_dir = args |
|
|
split_dir = osp.join(root_dir, split) |
|
|
|
|
|
frame_annotations_file = osp.join(split_dir, f"frame_annotations_{split}.jgz") |
|
|
with gzip.open(frame_annotations_file, "rt", encoding="utf8") as zipfile: |
|
|
frame_annots_list = load_dataclass(zipfile, List[DynamicReplicaFrameAnnotation]) |
|
|
|
|
|
|
|
|
seq_annot = defaultdict(lambda: defaultdict(list)) |
|
|
for frame_annot in frame_annots_list: |
|
|
seq_annot[frame_annot.sequence_name][frame_annot.camera_name].append( |
|
|
frame_annot |
|
|
) |
|
|
|
|
|
|
|
|
for seq_name in tqdm(seq_annot.keys(), desc=f"Processing split '{split}'"): |
|
|
|
|
|
for cam in ["left", "right"]: |
|
|
out_img_dir = osp.join(out_dir, split, seq_name, cam, "rgb") |
|
|
out_depth_dir = osp.join(out_dir, split, seq_name, cam, "depth") |
|
|
out_fflow_dir = osp.join(out_dir, split, seq_name, cam, "flow_forward") |
|
|
out_bflow_dir = osp.join(out_dir, split, seq_name, cam, "flow_backward") |
|
|
out_cam_dir = osp.join(out_dir, split, seq_name, cam, "cam") |
|
|
os.makedirs(out_img_dir, exist_ok=True) |
|
|
os.makedirs(out_depth_dir, exist_ok=True) |
|
|
os.makedirs(out_fflow_dir, exist_ok=True) |
|
|
os.makedirs(out_bflow_dir, exist_ok=True) |
|
|
os.makedirs(out_cam_dir, exist_ok=True) |
|
|
|
|
|
for framedata in tqdm( |
|
|
seq_annot[seq_name][cam], desc=f"Seq {seq_name} [{cam}]", leave=False |
|
|
): |
|
|
timestamp = framedata.frame_timestamp |
|
|
im_path = osp.join(split_dir, framedata.image.path) |
|
|
depth_path = osp.join(split_dir, framedata.depth.path) |
|
|
if framedata.flow_forward["path"]: |
|
|
flow_forward_path = osp.join( |
|
|
split_dir, framedata.flow_forward["path"] |
|
|
) |
|
|
flow_forward_mask_path = osp.join( |
|
|
split_dir, framedata.flow_forward_mask["path"] |
|
|
) |
|
|
if framedata.flow_backward["path"]: |
|
|
flow_backward_path = osp.join( |
|
|
split_dir, framedata.flow_backward["path"] |
|
|
) |
|
|
flow_backward_mask_path = osp.join( |
|
|
split_dir, framedata.flow_backward_mask["path"] |
|
|
) |
|
|
|
|
|
|
|
|
assert os.path.isfile(im_path), im_path |
|
|
assert os.path.isfile(depth_path), depth_path |
|
|
if framedata.flow_forward["path"]: |
|
|
assert os.path.isfile(flow_forward_path), flow_forward_path |
|
|
assert os.path.isfile( |
|
|
flow_forward_mask_path |
|
|
), flow_forward_mask_path |
|
|
if framedata.flow_backward["path"]: |
|
|
assert os.path.isfile(flow_backward_path), flow_backward_path |
|
|
assert os.path.isfile( |
|
|
flow_backward_mask_path |
|
|
), flow_backward_mask_path |
|
|
|
|
|
viewpoint = framedata.viewpoint |
|
|
|
|
|
depth = _load_16big_png_depth(depth_path) |
|
|
|
|
|
|
|
|
if framedata.flow_forward["path"]: |
|
|
flow_forward = cv2.imread(flow_forward_path, cv2.IMREAD_UNCHANGED) |
|
|
flow_forward_mask = cv2.imread( |
|
|
flow_forward_mask_path, cv2.IMREAD_UNCHANGED |
|
|
) |
|
|
np.savez( |
|
|
osp.join(out_fflow_dir, f"{timestamp}.npz"), |
|
|
flow=flow_forward, |
|
|
mask=flow_forward_mask, |
|
|
) |
|
|
if framedata.flow_backward["path"]: |
|
|
flow_backward = cv2.imread(flow_backward_path, cv2.IMREAD_UNCHANGED) |
|
|
flow_backward_mask = cv2.imread( |
|
|
flow_backward_mask_path, cv2.IMREAD_UNCHANGED |
|
|
) |
|
|
np.savez( |
|
|
osp.join(out_bflow_dir, f"{timestamp}.npz"), |
|
|
flow=flow_backward, |
|
|
mask=flow_backward_mask, |
|
|
) |
|
|
|
|
|
|
|
|
R, t, focal, pp = _get_pytorch3d_camera( |
|
|
viewpoint, framedata.image.size, scale=1.0 |
|
|
) |
|
|
intrinsics = np.eye(3) |
|
|
intrinsics[0, 0] = focal[0].item() |
|
|
intrinsics[1, 1] = focal[1].item() |
|
|
intrinsics[0, 2] = pp[0].item() |
|
|
intrinsics[1, 2] = pp[1].item() |
|
|
pose = np.eye(4) |
|
|
|
|
|
pose[:3, :3] = R.numpy().T |
|
|
pose[:3, 3] = -R.numpy().T @ t.numpy() |
|
|
|
|
|
|
|
|
out_img_path = osp.join(out_img_dir, f"{timestamp}.png") |
|
|
out_depth_path = osp.join(out_depth_dir, f"{timestamp}.npy") |
|
|
out_cam_path = osp.join(out_cam_dir, f"{timestamp}.npz") |
|
|
|
|
|
|
|
|
shutil.copy(im_path, out_img_path) |
|
|
|
|
|
np.save(out_depth_path, depth) |
|
|
|
|
|
np.savez(out_cam_path, intrinsics=intrinsics, pose=pose) |
|
|
|
|
|
return None |
|
|
|
|
|
|
|
|
def main(): |
|
|
parser = argparse.ArgumentParser( |
|
|
description="Preprocess Dynamic Replica dataset: convert raw annotations, images, " |
|
|
"depth, and flow files to a processed format." |
|
|
) |
|
|
parser.add_argument( |
|
|
"--root_dir", |
|
|
type=str, |
|
|
required=True, |
|
|
help="Root directory of the Dynamic Replica data.", |
|
|
) |
|
|
parser.add_argument( |
|
|
"--out_dir", |
|
|
type=str, |
|
|
required=True, |
|
|
help="Output directory for processed data.", |
|
|
) |
|
|
parser.add_argument( |
|
|
"--splits", |
|
|
type=str, |
|
|
nargs="+", |
|
|
default=SPLITS, |
|
|
help="List of splits to process (default: train valid test).", |
|
|
) |
|
|
parser.add_argument( |
|
|
"--num_processes", |
|
|
type=int, |
|
|
default=cpu_count(), |
|
|
help="Number of processes to use (default: number of CPU cores).", |
|
|
) |
|
|
args = parser.parse_args() |
|
|
|
|
|
os.makedirs(args.out_dir, exist_ok=True) |
|
|
tasks = [(split, args.root_dir, args.out_dir) for split in args.splits] |
|
|
|
|
|
print("Processing splits:", args.splits) |
|
|
with Pool(processes=args.num_processes) as pool: |
|
|
list( |
|
|
tqdm( |
|
|
pool.imap(process_split_data, tasks), |
|
|
total=len(tasks), |
|
|
desc="Overall Progress", |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|