LiDAR-Perfect-Depth / code /ppd /data /hypersim_lpd.py

code

436b829 verified 10 days ago

3.96 kB

	"""
	Hypersim adapter for the HarrisonPENG/hypersim mirror layout we downloaded.

	Each scene tars to:
	<data_root>/<scene>/cam_NN/<FFFFFF>_rgb.png
	<data_root>/<scene>/cam_NN/<FFFFFF>_depth.npy
	<data_root>/<scene>/cam_NN/<FFFFFF>_cam.npz

	PPD's stock `ppd/datasets/hypersim/metadata_splits_filtered_train.json` lists
	paths in the omrastogi/Hypersim-Processed format
	(`train/<scene>/rgb_cam_NN_frFFFF.png` etc.) — we translate on the fly so the
	same split file can be reused without regenerating.

	A scene that wasn't shipped in the HarrisonPENG mirror is silently dropped
	during enumeration (we have 449/456 of the metadata's scenes).
	"""
	from __future__ import annotations

	import json
	import os
	import re
	import time

	import cv2
	import numpy as np

	from ppd.data.depth_estimation import Dataset as BaseDataset
	from ppd.utils.logger import Log


	_NAME_RE = re.compile(r".*?(?:rgb\|depth_plane)_cam_(\d+)_fr(\d+)\.png$")


	def _to_harrison_paths(scene: str, name: str) -> tuple[str, str] \| None:
	"""Translate omrastogi-style filename → (rgb_rel, depth_rel) under <scene>/cam_NN/."""
	m = _NAME_RE.match(name)
	if not m:
	return None
	cam_id, frame_id = m.group(1), m.group(2)
	rgb = f"{scene}/cam_{cam_id}/{int(frame_id):06d}_rgb.png"
	depth = f"{scene}/cam_{cam_id}/{int(frame_id):06d}_depth.npy"
	return rgb, depth


	class Dataset(BaseDataset):
	"""Hypersim with HarrisonPENG/hypersim-style files (.npy depth)."""

	def build_metas(self):
	self.dataset_name = "hypersim"
	splits = json.load(open(self.cfg.split_path))
	rgb_paths = splits[f"{self.cfg.split}_rgb_paths"]
	dpt_paths = splits[f"{self.cfg.split}_dpt_paths"]
	assert len(rgb_paths) == len(dpt_paths)

	root = self.cfg.data_root
	self.rgb_files: list[str] = []
	self.depth_files: list[str] = []
	missing_scenes: set[str] = set()
	kept = 0
	for rp, _ in zip(rgb_paths, dpt_paths):
	# rp like "train/ai_001_001/rgb_cam_00_fr0000.png"
	parts = rp.split("/")
	if len(parts) < 3:
	continue
	scene = parts[1]
	name = parts[-1]
	translated = _to_harrison_paths(scene, name)
	if translated is None:
	continue
	rgb_rel, dpt_rel = translated
	rgb_full = os.path.join(root, rgb_rel)
	dpt_full = os.path.join(root, dpt_rel)
	if not os.path.exists(rgb_full) or not os.path.exists(dpt_full):
	missing_scenes.add(scene)
	continue
	self.rgb_files.append(rgb_full)
	self.depth_files.append(dpt_full)
	kept += 1

	if missing_scenes:
	Log.warn(
	f"hypersim_lpd: dropped {len(rgb_paths) - kept} samples from "
	f"{len(missing_scenes)} missing/unavailable scenes (e.g. {sorted(missing_scenes)[:3]})."
	)

	def read_rgb_name(self, index):
	return "__".join(self.rgb_files[index].split("/")[-3:])

	def read_depth(self, index, depth=None):
	path = self.depth_files[index]
	Log.debug(index, path)
	start = time.time()
	depth = np.load(path).astype(np.float32)
	if depth.ndim == 3 and depth.shape[-1] == 1:
	depth = depth[..., 0]
	if (time.time() - start) > 1:
	Log.warn(f"Long depth read {path}: {time.time() - start:.1f}s")
	valid_mask = (
	np.logical_and((depth > 0.1) & (depth < 65.0), ~np.isnan(depth)) & ~np.isinf(depth)
	)
	if valid_mask.sum() == 0:
	Log.warn(f"No valid mask in depth map: {path}")
	if valid_mask.sum() != 0 and np.isnan(depth).sum() != 0:
	depth[np.isnan(depth)] = depth[valid_mask].max()
	if valid_mask.sum() != 0 and np.isinf(depth).sum() != 0:
	depth[np.isinf(depth)] = depth[valid_mask].max()
	return depth, valid_mask.astype(np.uint8)