""" Hypersim adapter for the HarrisonPENG/hypersim mirror layout we downloaded. Each scene tars to: //cam_NN/_rgb.png //cam_NN/_depth.npy //cam_NN/_cam.npz PPD's stock `ppd/datasets/hypersim/metadata_splits_filtered_train.json` lists paths in the omrastogi/Hypersim-Processed format (`train//rgb_cam_NN_frFFFF.png` etc.) — we translate on the fly so the same split file can be reused without regenerating. A scene that wasn't shipped in the HarrisonPENG mirror is silently dropped during enumeration (we have 449/456 of the metadata's scenes). """ from __future__ import annotations import json import os import re import time import cv2 import numpy as np from ppd.data.depth_estimation import Dataset as BaseDataset from ppd.utils.logger import Log _NAME_RE = re.compile(r".*?(?:rgb|depth_plane)_cam_(\d+)_fr(\d+)\.png$") def _to_harrison_paths(scene: str, name: str) -> tuple[str, str] | None: """Translate omrastogi-style filename → (rgb_rel, depth_rel) under /cam_NN/.""" m = _NAME_RE.match(name) if not m: return None cam_id, frame_id = m.group(1), m.group(2) rgb = f"{scene}/cam_{cam_id}/{int(frame_id):06d}_rgb.png" depth = f"{scene}/cam_{cam_id}/{int(frame_id):06d}_depth.npy" return rgb, depth class Dataset(BaseDataset): """Hypersim with HarrisonPENG/hypersim-style files (.npy depth).""" def build_metas(self): self.dataset_name = "hypersim" splits = json.load(open(self.cfg.split_path)) rgb_paths = splits[f"{self.cfg.split}_rgb_paths"] dpt_paths = splits[f"{self.cfg.split}_dpt_paths"] assert len(rgb_paths) == len(dpt_paths) root = self.cfg.data_root self.rgb_files: list[str] = [] self.depth_files: list[str] = [] missing_scenes: set[str] = set() kept = 0 for rp, _ in zip(rgb_paths, dpt_paths): # rp like "train/ai_001_001/rgb_cam_00_fr0000.png" parts = rp.split("/") if len(parts) < 3: continue scene = parts[1] name = parts[-1] translated = _to_harrison_paths(scene, name) if translated is None: continue rgb_rel, dpt_rel = translated rgb_full = os.path.join(root, rgb_rel) dpt_full = os.path.join(root, dpt_rel) if not os.path.exists(rgb_full) or not os.path.exists(dpt_full): missing_scenes.add(scene) continue self.rgb_files.append(rgb_full) self.depth_files.append(dpt_full) kept += 1 if missing_scenes: Log.warn( f"hypersim_lpd: dropped {len(rgb_paths) - kept} samples from " f"{len(missing_scenes)} missing/unavailable scenes (e.g. {sorted(missing_scenes)[:3]})." ) def read_rgb_name(self, index): return "__".join(self.rgb_files[index].split("/")[-3:]) def read_depth(self, index, depth=None): path = self.depth_files[index] Log.debug(index, path) start = time.time() depth = np.load(path).astype(np.float32) if depth.ndim == 3 and depth.shape[-1] == 1: depth = depth[..., 0] if (time.time() - start) > 1: Log.warn(f"Long depth read {path}: {time.time() - start:.1f}s") valid_mask = ( np.logical_and((depth > 0.1) & (depth < 65.0), ~np.isnan(depth)) & ~np.isinf(depth) ) if valid_mask.sum() == 0: Log.warn(f"No valid mask in depth map: {path}") if valid_mask.sum() != 0 and np.isnan(depth).sum() != 0: depth[np.isnan(depth)] = depth[valid_mask].max() if valid_mask.sum() != 0 and np.isinf(depth).sum() != 0: depth[np.isinf(depth)] = depth[valid_mask].max() return depth, valid_mask.astype(np.uint8)