LiDAR-Perfect-Depth / code /ppd /data /hypersim_lpd.py
chenming-wu's picture
code
436b829 verified
"""
Hypersim adapter for the HarrisonPENG/hypersim mirror layout we downloaded.
Each scene tars to:
<data_root>/<scene>/cam_NN/<FFFFFF>_rgb.png
<data_root>/<scene>/cam_NN/<FFFFFF>_depth.npy
<data_root>/<scene>/cam_NN/<FFFFFF>_cam.npz
PPD's stock `ppd/datasets/hypersim/metadata_splits_filtered_train.json` lists
paths in the omrastogi/Hypersim-Processed format
(`train/<scene>/rgb_cam_NN_frFFFF.png` etc.) — we translate on the fly so the
same split file can be reused without regenerating.
A scene that wasn't shipped in the HarrisonPENG mirror is silently dropped
during enumeration (we have 449/456 of the metadata's scenes).
"""
from __future__ import annotations
import json
import os
import re
import time
import cv2
import numpy as np
from ppd.data.depth_estimation import Dataset as BaseDataset
from ppd.utils.logger import Log
_NAME_RE = re.compile(r".*?(?:rgb|depth_plane)_cam_(\d+)_fr(\d+)\.png$")
def _to_harrison_paths(scene: str, name: str) -> tuple[str, str] | None:
"""Translate omrastogi-style filename → (rgb_rel, depth_rel) under <scene>/cam_NN/."""
m = _NAME_RE.match(name)
if not m:
return None
cam_id, frame_id = m.group(1), m.group(2)
rgb = f"{scene}/cam_{cam_id}/{int(frame_id):06d}_rgb.png"
depth = f"{scene}/cam_{cam_id}/{int(frame_id):06d}_depth.npy"
return rgb, depth
class Dataset(BaseDataset):
"""Hypersim with HarrisonPENG/hypersim-style files (.npy depth)."""
def build_metas(self):
self.dataset_name = "hypersim"
splits = json.load(open(self.cfg.split_path))
rgb_paths = splits[f"{self.cfg.split}_rgb_paths"]
dpt_paths = splits[f"{self.cfg.split}_dpt_paths"]
assert len(rgb_paths) == len(dpt_paths)
root = self.cfg.data_root
self.rgb_files: list[str] = []
self.depth_files: list[str] = []
missing_scenes: set[str] = set()
kept = 0
for rp, _ in zip(rgb_paths, dpt_paths):
# rp like "train/ai_001_001/rgb_cam_00_fr0000.png"
parts = rp.split("/")
if len(parts) < 3:
continue
scene = parts[1]
name = parts[-1]
translated = _to_harrison_paths(scene, name)
if translated is None:
continue
rgb_rel, dpt_rel = translated
rgb_full = os.path.join(root, rgb_rel)
dpt_full = os.path.join(root, dpt_rel)
if not os.path.exists(rgb_full) or not os.path.exists(dpt_full):
missing_scenes.add(scene)
continue
self.rgb_files.append(rgb_full)
self.depth_files.append(dpt_full)
kept += 1
if missing_scenes:
Log.warn(
f"hypersim_lpd: dropped {len(rgb_paths) - kept} samples from "
f"{len(missing_scenes)} missing/unavailable scenes (e.g. {sorted(missing_scenes)[:3]})."
)
def read_rgb_name(self, index):
return "__".join(self.rgb_files[index].split("/")[-3:])
def read_depth(self, index, depth=None):
path = self.depth_files[index]
Log.debug(index, path)
start = time.time()
depth = np.load(path).astype(np.float32)
if depth.ndim == 3 and depth.shape[-1] == 1:
depth = depth[..., 0]
if (time.time() - start) > 1:
Log.warn(f"Long depth read {path}: {time.time() - start:.1f}s")
valid_mask = (
np.logical_and((depth > 0.1) & (depth < 65.0), ~np.isnan(depth)) & ~np.isinf(depth)
)
if valid_mask.sum() == 0:
Log.warn(f"No valid mask in depth map: {path}")
if valid_mask.sum() != 0 and np.isnan(depth).sum() != 0:
depth[np.isnan(depth)] = depth[valid_mask].max()
if valid_mask.sum() != 0 and np.isinf(depth).sum() != 0:
depth[np.isinf(depth)] = depth[valid_mask].max()
return depth, valid_mask.astype(np.uint8)