|
|
|
|
|
""" |
|
|
Usage: |
|
|
python preprocess_re10k.py --root_dir /path/to/train \ |
|
|
--info_dir /path/to/RealEstate10K/train \ |
|
|
--out_dir /path/to/processed_re10k |
|
|
""" |
|
|
|
|
|
import os |
|
|
import shutil |
|
|
import argparse |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
from tqdm import tqdm |
|
|
from concurrent.futures import ProcessPoolExecutor, as_completed |
|
|
|
|
|
|
|
|
def build_intrinsics(intrinsics_array, image_size): |
|
|
""" |
|
|
Build a 3x3 camera intrinsics matrix from the given intrinsics array and image size. |
|
|
|
|
|
Args: |
|
|
intrinsics_array (np.ndarray): An array containing [fx_rel, fy_rel, cx_rel, cy_rel, ...]. |
|
|
We assume the first four components define focal and center |
|
|
in normalized device coordinates (0..1). |
|
|
image_size (tuple): The (width, height) of the image. |
|
|
|
|
|
Returns: |
|
|
np.ndarray: A 3x3 intrinsics matrix. |
|
|
""" |
|
|
|
|
|
|
|
|
width, height = image_size |
|
|
fx_rel, fy_rel, cx_rel, cy_rel = intrinsics_array[:4] |
|
|
fx = fx_rel * width |
|
|
fy = fy_rel * height |
|
|
cx = cx_rel * width |
|
|
cy = cy_rel * height |
|
|
|
|
|
K = np.eye(3, dtype=np.float64) |
|
|
K[0, 0] = fx |
|
|
K[1, 1] = fy |
|
|
K[0, 2] = cx |
|
|
K[1, 2] = cy |
|
|
|
|
|
return K |
|
|
|
|
|
|
|
|
def compute_pose(extrinsics_array): |
|
|
""" |
|
|
Compute the 4x4 pose matrix by inverting the 3x4 extrinsic matrix (plus a row [0, 0, 0, 1]). |
|
|
|
|
|
Args: |
|
|
extrinsics_array (np.ndarray): A 12-element array reshaped to (3,4) that |
|
|
represents a camera-to-world or world-to-camera transform. |
|
|
|
|
|
Returns: |
|
|
np.ndarray: A 4x4 pose matrix (world-to-camera, or vice versa depending on your convention). |
|
|
""" |
|
|
extrinsics_3x4 = extrinsics_array.reshape(3, 4) |
|
|
extrinsics_4x4 = np.vstack([extrinsics_3x4, [0, 0, 0, 1]]) |
|
|
|
|
|
pose = np.linalg.inv(extrinsics_4x4) |
|
|
return pose |
|
|
|
|
|
|
|
|
def process_frame(task): |
|
|
""" |
|
|
Process a single frame: |
|
|
- Reads the timestamp, intrinsics, and extrinsics. |
|
|
- Copies the image to the output directory. |
|
|
- Creates a .npz file containing camera intrinsics and the computed pose. |
|
|
|
|
|
Args: |
|
|
task (tuple): A tuple that contains: |
|
|
(seq_dir, out_rgb_dir, out_cam_dir, raw_line). |
|
|
|
|
|
Returns: |
|
|
str or None: |
|
|
A string with an error message if something fails; otherwise None on success. |
|
|
""" |
|
|
seq_dir, out_rgb_dir, out_cam_dir, raw_line = task |
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
|
|
|
timestamp = int(raw_line[0]) |
|
|
intrinsics_array = raw_line[1:7] |
|
|
extrinsics_array = raw_line[7:] |
|
|
|
|
|
img_name = f"{timestamp}.png" |
|
|
src_img_path = os.path.join(seq_dir, img_name) |
|
|
if not os.path.isfile(src_img_path): |
|
|
return f"Image file not found: {src_img_path}" |
|
|
|
|
|
|
|
|
out_img_path = os.path.join(out_rgb_dir, img_name) |
|
|
out_cam_path = os.path.join(out_cam_dir, f"{timestamp}.npz") |
|
|
|
|
|
|
|
|
if os.path.isfile(out_cam_path): |
|
|
return None |
|
|
|
|
|
|
|
|
with Image.open(src_img_path) as img: |
|
|
width, height = img.size |
|
|
|
|
|
|
|
|
K = build_intrinsics(intrinsics_array, (width, height)) |
|
|
|
|
|
|
|
|
pose = compute_pose(extrinsics_array) |
|
|
|
|
|
|
|
|
shutil.copyfile(src_img_path, out_img_path) |
|
|
|
|
|
|
|
|
np.savez(out_cam_path, intrinsics=K, pose=pose) |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error processing frame for {seq_dir} at timestamp {timestamp}: {e}" |
|
|
|
|
|
return None |
|
|
|
|
|
|
|
|
def process_sequence(seq, root_dir, info_dir, out_dir): |
|
|
""" |
|
|
Process a single sequence: |
|
|
- Reads a metadata .txt file containing intrinsics and extrinsics for each frame. |
|
|
- Prepares a list of tasks for parallel processing. |
|
|
|
|
|
Args: |
|
|
seq (str): Name of the sequence. |
|
|
root_dir (str): Directory where the original sequence images (e.g., .png) are stored. |
|
|
info_dir (str): Directory containing the .txt file with camera metadata for this sequence. |
|
|
out_dir (str): Output directory where processed frames will be stored. |
|
|
""" |
|
|
seq_dir = os.path.join(root_dir, seq) |
|
|
scene_info_path = os.path.join(info_dir, f"{seq}.txt") |
|
|
|
|
|
if not os.path.isfile(scene_info_path): |
|
|
tqdm.write(f"Metadata file not found for sequence {seq} - skipping.") |
|
|
return |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
scene_info = np.loadtxt( |
|
|
scene_info_path, delimiter=" ", dtype=np.float64, skiprows=1 |
|
|
) |
|
|
except Exception as e: |
|
|
tqdm.write(f"Error reading scene info for {seq}: {e}") |
|
|
return |
|
|
|
|
|
|
|
|
out_seq_dir = os.path.join(out_dir, seq) |
|
|
out_rgb_dir = os.path.join(out_seq_dir, "rgb") |
|
|
out_cam_dir = os.path.join(out_seq_dir, "cam") |
|
|
os.makedirs(out_rgb_dir, exist_ok=True) |
|
|
os.makedirs(out_cam_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
tasks = [(seq_dir, out_rgb_dir, out_cam_dir, line) for line in scene_info] |
|
|
|
|
|
|
|
|
with ProcessPoolExecutor(max_workers=os.cpu_count() // 2 or 1) as executor: |
|
|
futures = {executor.submit(process_frame, t): t for t in tasks} |
|
|
for future in as_completed(futures): |
|
|
error_msg = future.result() |
|
|
if error_msg: |
|
|
tqdm.write(error_msg) |
|
|
|
|
|
|
|
|
def main(): |
|
|
parser = argparse.ArgumentParser( |
|
|
description="Process video frames and associated camera metadata." |
|
|
) |
|
|
parser.add_argument( |
|
|
"--root_dir", |
|
|
required=True, |
|
|
help="Directory containing sequence folders with .png images.", |
|
|
) |
|
|
parser.add_argument( |
|
|
"--info_dir", required=True, help="Directory containing metadata .txt files." |
|
|
) |
|
|
parser.add_argument( |
|
|
"--out_dir", required=True, help="Output directory for processed data." |
|
|
) |
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
|
if not os.path.isdir(args.root_dir): |
|
|
raise FileNotFoundError(f"Root directory not found: {args.root_dir}") |
|
|
|
|
|
seqs = [ |
|
|
d |
|
|
for d in os.listdir(args.root_dir) |
|
|
if os.path.isdir(os.path.join(args.root_dir, d)) |
|
|
] |
|
|
if not seqs: |
|
|
raise ValueError(f"No sequence folders found in {args.root_dir}.") |
|
|
|
|
|
|
|
|
for seq in tqdm(seqs, desc="Sequences"): |
|
|
process_sequence(seq, args.root_dir, args.info_dir, args.out_dir) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|