File size: 3,962 Bytes
436b829
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
Hypersim adapter for the HarrisonPENG/hypersim mirror layout we downloaded.

Each scene tars to:
    <data_root>/<scene>/cam_NN/<FFFFFF>_rgb.png
    <data_root>/<scene>/cam_NN/<FFFFFF>_depth.npy
    <data_root>/<scene>/cam_NN/<FFFFFF>_cam.npz

PPD's stock `ppd/datasets/hypersim/metadata_splits_filtered_train.json` lists
paths in the omrastogi/Hypersim-Processed format
(`train/<scene>/rgb_cam_NN_frFFFF.png` etc.) — we translate on the fly so the
same split file can be reused without regenerating.

A scene that wasn't shipped in the HarrisonPENG mirror is silently dropped
during enumeration (we have 449/456 of the metadata's scenes).
"""
from __future__ import annotations

import json
import os
import re
import time

import cv2
import numpy as np

from ppd.data.depth_estimation import Dataset as BaseDataset
from ppd.utils.logger import Log


_NAME_RE = re.compile(r".*?(?:rgb|depth_plane)_cam_(\d+)_fr(\d+)\.png$")


def _to_harrison_paths(scene: str, name: str) -> tuple[str, str] | None:
    """Translate omrastogi-style filename → (rgb_rel, depth_rel) under <scene>/cam_NN/."""
    m = _NAME_RE.match(name)
    if not m:
        return None
    cam_id, frame_id = m.group(1), m.group(2)
    rgb = f"{scene}/cam_{cam_id}/{int(frame_id):06d}_rgb.png"
    depth = f"{scene}/cam_{cam_id}/{int(frame_id):06d}_depth.npy"
    return rgb, depth


class Dataset(BaseDataset):
    """Hypersim with HarrisonPENG/hypersim-style files (.npy depth)."""

    def build_metas(self):
        self.dataset_name = "hypersim"
        splits = json.load(open(self.cfg.split_path))
        rgb_paths = splits[f"{self.cfg.split}_rgb_paths"]
        dpt_paths = splits[f"{self.cfg.split}_dpt_paths"]
        assert len(rgb_paths) == len(dpt_paths)

        root = self.cfg.data_root
        self.rgb_files: list[str] = []
        self.depth_files: list[str] = []
        missing_scenes: set[str] = set()
        kept = 0
        for rp, _ in zip(rgb_paths, dpt_paths):
            # rp like "train/ai_001_001/rgb_cam_00_fr0000.png"
            parts = rp.split("/")
            if len(parts) < 3:
                continue
            scene = parts[1]
            name = parts[-1]
            translated = _to_harrison_paths(scene, name)
            if translated is None:
                continue
            rgb_rel, dpt_rel = translated
            rgb_full = os.path.join(root, rgb_rel)
            dpt_full = os.path.join(root, dpt_rel)
            if not os.path.exists(rgb_full) or not os.path.exists(dpt_full):
                missing_scenes.add(scene)
                continue
            self.rgb_files.append(rgb_full)
            self.depth_files.append(dpt_full)
            kept += 1

        if missing_scenes:
            Log.warn(
                f"hypersim_lpd: dropped {len(rgb_paths) - kept} samples from "
                f"{len(missing_scenes)} missing/unavailable scenes (e.g. {sorted(missing_scenes)[:3]})."
            )

    def read_rgb_name(self, index):
        return "__".join(self.rgb_files[index].split("/")[-3:])

    def read_depth(self, index, depth=None):
        path = self.depth_files[index]
        Log.debug(index, path)
        start = time.time()
        depth = np.load(path).astype(np.float32)
        if depth.ndim == 3 and depth.shape[-1] == 1:
            depth = depth[..., 0]
        if (time.time() - start) > 1:
            Log.warn(f"Long depth read {path}: {time.time() - start:.1f}s")
        valid_mask = (
            np.logical_and((depth > 0.1) & (depth < 65.0), ~np.isnan(depth)) & ~np.isinf(depth)
        )
        if valid_mask.sum() == 0:
            Log.warn(f"No valid mask in depth map: {path}")
        if valid_mask.sum() != 0 and np.isnan(depth).sum() != 0:
            depth[np.isnan(depth)] = depth[valid_mask].max()
        if valid_mask.sum() != 0 and np.isinf(depth).sum() != 0:
            depth[np.isinf(depth)] = depth[valid_mask].max()
        return depth, valid_mask.astype(np.uint8)