|
|
|
|
|
""" |
|
|
HOI4D Preprocessing Script |
|
|
|
|
|
This script processes HOI4D data by: |
|
|
1. Searching specific subdirectories for RGB and depth images. |
|
|
2. Reading camera intrinsics from a .npy file (one per high-level scene). |
|
|
3. Rescaling the RGB images and depth maps to a fixed output resolution |
|
|
(e.g., 640x480) using the 'cropping' module. |
|
|
4. Saving results (RGB, .npy depth, .npz camera intrinsics) in a new directory structure. |
|
|
|
|
|
Usage: |
|
|
python preprocess_hoi4d.py \ |
|
|
--root_dir /path/to/HOI4D_release \ |
|
|
--cam_root /path/to/camera_params \ |
|
|
--out_dir /path/to/processed_hoi4d |
|
|
""" |
|
|
|
|
|
import os |
|
|
import glob |
|
|
import cv2 |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
from tqdm import tqdm |
|
|
from concurrent.futures import ProcessPoolExecutor |
|
|
import argparse |
|
|
|
|
|
import src.dust3r.datasets.utils.cropping as cropping |
|
|
|
|
|
def parse_arguments(): |
|
|
""" |
|
|
Parse command-line arguments for HOI4D preprocessing. |
|
|
|
|
|
Returns: |
|
|
argparse.Namespace: The parsed arguments. |
|
|
""" |
|
|
parser = argparse.ArgumentParser( |
|
|
description="Preprocess HOI4D dataset by rescaling RGB and depth images." |
|
|
) |
|
|
parser.add_argument("--root_dir", required=True, |
|
|
help="Path to the HOI4D_release directory.") |
|
|
parser.add_argument("--cam_root", required=True, |
|
|
help="Path to the directory containing camera intrinsics.") |
|
|
parser.add_argument("--out_dir", required=True, |
|
|
help="Path to the directory where processed files will be saved.") |
|
|
parser.add_argument("--max_workers", type=int, default=None, |
|
|
help="Number of parallel workers. Default uses half of available CPU cores.") |
|
|
args = parser.parse_args() |
|
|
return args |
|
|
|
|
|
def process_image(args): |
|
|
""" |
|
|
Process a single image and depth map: |
|
|
- Loads the image (using PIL) and depth (using OpenCV). |
|
|
- Converts depth from mm to meters (divided by 1000). |
|
|
- Rescales both using 'cropping.rescale_image_depthmap'. |
|
|
- Saves the rescaled image (.png), depth (.npy), and camera intrinsics (.npz). |
|
|
|
|
|
Args: |
|
|
args (tuple): A tuple of: |
|
|
(img_path, depth_path, out_img_path, out_depth_path, out_cam_path, intrinsics) |
|
|
|
|
|
Returns: |
|
|
None. Errors are printed to the console but do not stop the workflow. |
|
|
""" |
|
|
img_path, depth_path, out_img_path, out_depth_path, out_cam_path, intrinsics = args |
|
|
|
|
|
try: |
|
|
|
|
|
img = Image.open(img_path) |
|
|
|
|
|
|
|
|
depth = cv2.imread(depth_path, cv2.IMREAD_ANYDEPTH) |
|
|
if depth is None: |
|
|
raise ValueError(f"Could not read depth image: {depth_path}") |
|
|
depth = depth.astype(np.float32) / 1000.0 |
|
|
|
|
|
|
|
|
img_rescaled, depth_rescaled, intrinsics_rescaled = cropping.rescale_image_depthmap( |
|
|
img, depth, intrinsics.copy(), (640, 480) |
|
|
) |
|
|
|
|
|
|
|
|
img_rescaled.save(out_img_path) |
|
|
np.save(out_depth_path, depth_rescaled) |
|
|
np.savez(out_cam_path, intrinsics=intrinsics_rescaled) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error processing {img_path}: {e}") |
|
|
|
|
|
def main(): |
|
|
args = parse_arguments() |
|
|
|
|
|
root = args.root_dir |
|
|
cam_root = args.cam_root |
|
|
out_dir = args.out_dir |
|
|
if not os.path.exists(out_dir): |
|
|
os.makedirs(out_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
|
|
|
scene_dirs = glob.glob(os.path.join(root, "ZY2021*", "H*", "C*", "N*", "S*", "s*", "T*")) |
|
|
|
|
|
|
|
|
tasks = [] |
|
|
for scene_dir in tqdm(scene_dirs, desc="Collecting scenes"): |
|
|
|
|
|
|
|
|
scene_relpath = os.path.relpath(scene_dir, root) |
|
|
scene_name = "_".join(scene_relpath.split(os.sep)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
top_level = scene_relpath.split(os.sep)[0] |
|
|
cam_file = os.path.join(cam_root, top_level, "intrin.npy") |
|
|
if not os.path.isfile(cam_file): |
|
|
print(f"Warning: Camera file not found: {cam_file}. Skipping {scene_dir}") |
|
|
continue |
|
|
intrinsics = np.load(cam_file) |
|
|
|
|
|
|
|
|
rgb_dir = os.path.join(scene_dir, "align_rgb") |
|
|
depth_dir = os.path.join(scene_dir, "align_depth") |
|
|
|
|
|
|
|
|
out_rgb_dir = os.path.join(out_dir, scene_name, "rgb") |
|
|
out_depth_dir = os.path.join(out_dir, scene_name, "depth") |
|
|
out_cam_dir = os.path.join(out_dir, scene_name, "cam") |
|
|
os.makedirs(out_rgb_dir, exist_ok=True) |
|
|
os.makedirs(out_depth_dir, exist_ok=True) |
|
|
os.makedirs(out_cam_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
img_paths = sorted(glob.glob(os.path.join(rgb_dir, "*.jpg"))) |
|
|
|
|
|
|
|
|
for img_path in img_paths: |
|
|
basename = os.path.splitext(os.path.basename(img_path))[0] |
|
|
depth_path = os.path.join(depth_dir, f"{basename}.png") |
|
|
|
|
|
out_img_path = os.path.join(out_rgb_dir, f"{basename}.png") |
|
|
out_depth_path = os.path.join(out_depth_dir, f"{basename}.npy") |
|
|
out_cam_path = os.path.join(out_cam_dir, f"{basename}.npz") |
|
|
|
|
|
|
|
|
if (os.path.exists(out_img_path) and os.path.exists(out_depth_path) and |
|
|
os.path.exists(out_cam_path)): |
|
|
continue |
|
|
|
|
|
task = ( |
|
|
img_path, |
|
|
depth_path, |
|
|
out_img_path, |
|
|
out_depth_path, |
|
|
out_cam_path, |
|
|
intrinsics |
|
|
) |
|
|
tasks.append(task) |
|
|
|
|
|
|
|
|
max_workers = args.max_workers |
|
|
if max_workers is None: |
|
|
max_workers = max(1, os.cpu_count() // 2) |
|
|
|
|
|
with ProcessPoolExecutor(max_workers=max_workers) as executor: |
|
|
list(tqdm( |
|
|
executor.map(process_image, tasks), |
|
|
total=len(tasks), |
|
|
desc="Processing images" |
|
|
)) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|