updated for 2026

Browse files

Files changed (6) hide show

README.md +16 -5
hoho2025/example_solutions.py +70 -26
hoho2025/viz3d.py +396 -19
notebooks/{example.ipynb → example_2025.ipynb} +0 -0
notebooks/example_2026.ipynb +0 -0
requirements.txt +3 -2

README.md CHANGED Viewed

@@ -23,7 +23,12 @@ cd tools2025
 pip install -e .
 ```
-### Usage example
 ```python
 from datasets import load_dataset
@@ -41,7 +46,7 @@ def read_colmap_rec(colmap_data):
         rec = pycolmap.Reconstruction(tmpdir)
         return rec
-ds = load_dataset("usm3d/hoho25k", streaming=True, trust_remote_code=True)
 for a in ds['train']:
     break
@@ -50,7 +55,7 @@ fig, ax = plot_all_modalities(a)
 ## Now 3d
 fig3d = init_figure()
-plot_reconstruction(fig3d, read_colmap_rec(a['colmap_binary']))
 plot_wireframe(fig3d, a['wf_vertices'], a['wf_edges'], a['wf_classifications'])
 plot_bpo_cameras_from_entry(fig3d, a)
 fig3d
@@ -65,7 +70,7 @@ from hoho2025.example_solutions import predict_wireframe
 pred_vertices, pred_connections = predict_wireframe(a)
 fig3d = init_figure()
-plot_reconstruction(fig3d, read_colmap_rec(a['colmap_binary']))
 plot_wireframe(fig3d, pred_vertices, pred_connections, color='rgb(0, 0, 255)')
 fig3d
 ```
@@ -78,4 +83,10 @@ from hoho2025.metric_helper import hss
 score = hss(pred_vertices, pred_connections, a['wf_vertices'], a['wf_edges'], vert_thresh=0.5, edge_thresh=0.5)
 print (score)
-```

 pip install -e .
 ```
+### Usage example 2026
+See in [notebook](notebooks/example_2026.ipynb)
 ```python
 from datasets import load_dataset
         rec = pycolmap.Reconstruction(tmpdir)
         return rec
+ds = load_dataset("usm3d/hoho22k_2026_trainval", streaming=True, trust_remote_code=True)
 for a in ds['train']:
     break
 ## Now 3d
 fig3d = init_figure()
+plot_reconstruction(fig3d, read_colmap_rec(a['colmap']))
 plot_wireframe(fig3d, a['wf_vertices'], a['wf_edges'], a['wf_classifications'])
 plot_bpo_cameras_from_entry(fig3d, a)
 fig3d
 pred_vertices, pred_connections = predict_wireframe(a)
 fig3d = init_figure()
+plot_reconstruction(fig3d, read_colmap_rec(a['colmap']))
 plot_wireframe(fig3d, pred_vertices, pred_connections, color='rgb(0, 0, 255)')
 fig3d
 ```
 score = hss(pred_vertices, pred_connections, a['wf_vertices'], a['wf_edges'], vert_thresh=0.5, edge_thresh=0.5)
 print (score)
+```
+### Usage example 2025
+See in [notebooks](notebooks/example_2025.ipynb)

hoho2025/example_solutions.py CHANGED Viewed

@@ -26,10 +26,39 @@ def read_colmap_rec(colmap_data):
         rec = pycolmap.Reconstruction(tmpdir)
         return rec
 def convert_entry_to_human_readable(entry):
     out = {}
     for k, v in entry.items():
-        if 'colmap' in k:
             out[k] = read_colmap_rec(v)
         elif k in ['wf_vertices', 'wf_edges', 'K', 'R', 't', 'depth']:
             out[k] = np.array(v)
@@ -272,7 +301,9 @@ def get_uv_depth(vertices: List[dict],
-def project_vertices_to_3d(uv: np.ndarray, depth_vert: np.ndarray, col_img: pycolmap.Image) -> np.ndarray:
     """
     Projects 2D vertex coordinates with associated depths to 3D world coordinates.
@@ -283,25 +314,40 @@ def project_vertices_to_3d(uv: np.ndarray, depth_vert: np.ndarray, col_img: pyco
     depth_vert : np.ndarray
         (N,) array of depth values for each vertex.
     col_img : pycolmap.Image
     Returns
     -------
     vertices_3d : np.ndarray
         (N, 3) array of vertex coordinates in 3D world space.
     """
     # Backproject to 3D local camera coordinates
     xy_local = np.ones((len(uv), 3))
-    K = col_img.camera.calibration_matrix()
     xy_local[:, 0] = (uv[:, 0] - K[0, 2]) / K[0, 0]
     xy_local[:, 1] = (uv[:, 1] - K[1, 2]) / K[1, 1]
-    # Get the 3D vertices
-    vertices_3d_local = xy_local * depth_vert[...,None]
-    # Create camera-to-world transformation matrix
     world_to_cam = np.eye(4)
-    world_to_cam[:3] = col_img.cam_from_world.matrix()
     cam_to_world = np.linalg.inv(world_to_cam)
     # Transform local 3D points to world coordinates
     vertices_3d_homogeneous = cv2.convertPointsToHomogeneous(vertices_3d_local)
     vertices_3d = cv2.transform(vertices_3d_homogeneous, cam_to_world)
@@ -354,7 +400,7 @@ def create_3d_wireframe_single_image(vertices: List[dict],
     uv, depth_vert = get_uv_depth(vertices, depth_fitted, depth_sparse, 10)
     # Backproject to 3D
-    vertices_3d = project_vertices_to_3d(uv, depth_vert, col_img)
     return vertices_3d
@@ -536,24 +582,19 @@ def get_sparse_depth(colmap_rec, img_id_substring, depth):
     points_xyz = np.array(points_xyz)  # (N, 3)
-    # 3) For each point, project via col_img.project_point()
     uv = []
     z_vals = []
     for xyz in points_xyz:
-        proj = found_img.project_point(xyz)  # returns (u, v) in image coords or None
-        if proj is not None:
-            u_i, v_i = proj
-            u_i = int(round(u_i))
-            v_i = int(round(v_i))
-            # Check in-bounds
-            if 0 <= u_i < W and 0 <= v_i < H:
-                uv.append((u_i, v_i))
-                # We'll compute depth as Z in camera coords
-                # from the world->cam transform col_img holds
-                mat4x4 = np.eye(4)
-                mat4x4[:3, :4] = found_img.cam_from_world.matrix()
-                p_cam =  mat4x4@ np.array([xyz[0], xyz[1], xyz[2], 1.0])
-                z_vals.append(p_cam[2] / p_cam[3])
     uv = np.array(uv, dtype=int)     # shape (M,2)
     z_vals = np.array(z_vals)        # shape (M,)
@@ -664,7 +705,10 @@ def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
                                                 good_entry['image_ids'],
                                                 good_entry['ade'] # Added ade20k segmentation
                                                 )):
-        colmap_rec = good_entry['colmap_binary']
         K = np.array(K)
         R = np.array(R)
         t = np.array(t)

         rec = pycolmap.Reconstruction(tmpdir)
         return rec
+def _cam_matrix_from_image(img):
+    """Return (R 3×3, t 3) from a pycolmap.Image, compatible with all pycolmap versions."""
+    cfW = img.cam_from_world
+    try:
+        R = cfW.rotation.matrix()
+        t = cfW.translation
+    except AttributeError:
+        # Older API: matrix() returns 3×4 [R | t]
+        M = np.array(cfW.matrix())
+        R, t = M[:, :3], M[:, 3]
+    return np.array(R, dtype=np.float64), np.array(t, dtype=np.float64)
+def _colmap_project_point(img, cam, xyz):
+    """Project a 3-D world point into image pixel coordinates.
+    Returns ``((u, v), depth_z)`` or ``None`` if the point is behind the camera.
+    Works with any pycolmap version (replaces the removed ``Image.project_point``).
+    """
+    R, t = _cam_matrix_from_image(img)
+    p_cam = R @ np.asarray(xyz, dtype=np.float64) + t
+    if p_cam[2] <= 0:
+        return None
+    K = cam.calibration_matrix()
+    u = p_cam[0] / p_cam[2] * K[0, 0] + K[0, 2]
+    v = p_cam[1] / p_cam[2] * K[1, 1] + K[1, 2]
+    return (u, v), p_cam[2]
 def convert_entry_to_human_readable(entry):
     out = {}
     for k, v in entry.items():
+        if 'colmap' in k and k!= 'pose_only_in_colmap':
             out[k] = read_colmap_rec(v)
         elif k in ['wf_vertices', 'wf_edges', 'K', 'R', 't', 'depth']:
             out[k] = np.array(v)
+def project_vertices_to_3d(uv: np.ndarray, depth_vert: np.ndarray,
+                           col_img: pycolmap.Image,
+                           colmap_rec: pycolmap.Reconstruction = None) -> np.ndarray:
     """
     Projects 2D vertex coordinates with associated depths to 3D world coordinates.
     depth_vert : np.ndarray
         (N,) array of depth values for each vertex.
     col_img : pycolmap.Image
+    colmap_rec : pycolmap.Reconstruction, optional
+        Required for newer pycolmap versions where ``Image.camera`` no longer
+        exists.  Ignored if the old ``col_img.camera`` shortcut is available.
     Returns
     -------
     vertices_3d : np.ndarray
         (N, 3) array of vertex coordinates in 3D world space.
     """
+    # Obtain camera intrinsics — try the old shortcut first, then fall back to
+    # looking up the camera through the reconstruction.
+    try:
+        K = col_img.camera.calibration_matrix()
+    except AttributeError:
+        if colmap_rec is None:
+            raise AttributeError(
+                "col_img.camera is not available in this pycolmap version. "
+                "Pass colmap_rec to project_vertices_to_3d()."
+            )
+        K = colmap_rec.cameras[col_img.camera_id].calibration_matrix()
     # Backproject to 3D local camera coordinates
     xy_local = np.ones((len(uv), 3))
     xy_local[:, 0] = (uv[:, 0] - K[0, 2]) / K[0, 0]
     xy_local[:, 1] = (uv[:, 1] - K[1, 2]) / K[1, 1]
+    vertices_3d_local = xy_local * depth_vert[..., None]
+    # Build 4×4 world-to-cam matrix using the version-agnostic helper.
+    R, t = _cam_matrix_from_image(col_img)
     world_to_cam = np.eye(4)
+    world_to_cam[:3, :3] = R
+    world_to_cam[:3, 3] = t
     cam_to_world = np.linalg.inv(world_to_cam)
     # Transform local 3D points to world coordinates
     vertices_3d_homogeneous = cv2.convertPointsToHomogeneous(vertices_3d_local)
     vertices_3d = cv2.transform(vertices_3d_homogeneous, cam_to_world)
     uv, depth_vert = get_uv_depth(vertices, depth_fitted, depth_sparse, 10)
     # Backproject to 3D
+    vertices_3d = project_vertices_to_3d(uv, depth_vert, col_img, colmap_rec=colmap_rec)
     return vertices_3d
     points_xyz = np.array(points_xyz)  # (N, 3)
+    # 3) Project each 3D point into the image using the version-agnostic helper.
+    cam = colmap_rec.cameras[found_img.camera_id]
     uv = []
     z_vals = []
     for xyz in points_xyz:
+        result = _colmap_project_point(found_img, cam, xyz)
+        if result is None:
+            continue
+        (u_f, v_f), depth_z = result
+        u_i, v_i = int(round(u_f)), int(round(v_f))
+        if 0 <= u_i < W and 0 <= v_i < H:
+            uv.append((u_i, v_i))
+            z_vals.append(depth_z)
     uv = np.array(uv, dtype=int)     # shape (M,2)
     z_vals = np.array(z_vals)        # shape (M,)
                                                 good_entry['image_ids'],
                                                 good_entry['ade'] # Added ade20k segmentation
                                                 )):
+        if 'colmap' in good_entry:
+            colmap_rec = good_entry['colmap']
+        else:
+            colmap_rec = good_entry['colmap_binary']
         K = np.array(K)
         R = np.array(R)
         t = np.array(t)

hoho2025/viz3d.py CHANGED Viewed

@@ -34,8 +34,17 @@ def to_homogeneous(points):
 ### Plotting functions
-def init_figure(height: int = 800) -> go.Figure:
-    """Initialize a 3D figure."""
     fig = go.FigureWidget()
     axes = dict(
         visible=False,
@@ -45,13 +54,22 @@ def init_figure(height: int = 800) -> go.Figure:
         showticklabels=True,
         autorange=True,
     )
     fig.update_layout(
         template="plotly_dark",
         height=height,
-        scene_camera=dict(
-            eye=dict(x=0., y=-.1, z=-2),
-            up=dict(x=0, y=-1., z=0),
-            projection=dict(type="orthographic")),
         scene=dict(
             xaxis=axes,
             yaxis=axes,
@@ -214,7 +232,6 @@ def plot_reconstruction(
     rgbs = []
     # Iterate over rec.points3D
     for k, p3D in rec.points3D.items():
-        #print (p3D)
         xyzs.append(p3D.xyz)
         rgbs.append(p3D.color)
@@ -246,7 +263,7 @@ def plot_wireframe(
         color: str = 'rgb(0, 0, 255)',
         name: Optional[str] = None,
         **kwargs):
-    """Plot a camera as a cone with camera frustum."""
     gt_vertices = np.array(vertices)
     gt_connections = np.array(edges)
     if gt_vertices is not None:
@@ -267,21 +284,381 @@ def plot_wireframe(
                 plot_lines_3d(fig, np.array(gt_lines), color, ps=4)
-def plot_bpo_cameras_from_entry(fig: go.Figure, entry: dict, idx = None):
     def cam2world_to_world2cam(R, t):
-        rt = np.eye(4)
-        rt[:3,:3] = R
-        rt[:3,3] = t.reshape(-1)
-        rt = np.linalg.inv(rt)
-        return rt[:3,:3], rt[:3,3]
     for i in range(len(entry['R'])):
         if idx is not None and i != idx:
             continue
         K = np.array(entry['K'][i])
         R = np.array(entry['R'][i])
         t = np.array(entry['t'][i])
-        R, t = cam2world_to_world2cam(R, t)
-        plot_camera(fig, R, t, K)

 ### Plotting functions
+def init_figure(height: int = 800, reverse_gravity: bool = False) -> go.Figure:
+    """Initialize a 3D figure.
+    Args:
+        height: Figure height in pixels.
+        reverse_gravity: Set to ``True`` for the **2025** dataset, whose
+            coordinate frame has Y pointing *down* (the original SketchUp /
+            COLMAP convention before the 2026 re-orientation).  When ``False``
+            (default, for the **2026** dataset) the viewer is set up for a
+            standard Y-up world so that the roof wireframe appears right-side up.
+    """
     fig = go.FigureWidget()
     axes = dict(
         visible=False,
         showticklabels=True,
         autorange=True,
     )
+    if reverse_gravity:
+        # 2025 data: Y points down — look from below with Y-down up-vector.
+        scene_camera = dict(
+            eye=dict(x=0., y=-.1, z=-2.),
+            up=dict(x=0, y=-1., z=0),
+            projection=dict(type="orthographic"))
+    else:
+        # 2026 data: Y points up — standard bird's-eye view.
+        scene_camera = dict(
+            eye=dict(x=0., y=1.5, z=-3.),
+            up=dict(x=0, y=1., z=0),
+            projection=dict(type="orthographic"))
     fig.update_layout(
         template="plotly_dark",
         height=height,
+        scene_camera=scene_camera,
         scene=dict(
             xaxis=axes,
             yaxis=axes,
     rgbs = []
     # Iterate over rec.points3D
     for k, p3D in rec.points3D.items():
         xyzs.append(p3D.xyz)
         rgbs.append(p3D.color)
         color: str = 'rgb(0, 0, 255)',
         name: Optional[str] = None,
         **kwargs):
+    """Plot a wireframe with per-edge semantic colors."""
     gt_vertices = np.array(vertices)
     gt_connections = np.array(edges)
     if gt_vertices is not None:
                 plot_lines_3d(fig, np.array(gt_lines), color, ps=4)
+def plot_bpo_cameras_from_entry(
+        fig: go.Figure,
+        entry: dict,
+        idx: Optional[int] = None,
+        color: str = 'rgb(255, 128, 0)',
+        size: float = 1.0):
+    """Plot BPO (DAE) camera frustums for a dataset entry.
+    Cameras flagged as ``pose_only_in_colmap=True`` are skipped because their
+    K / R / t are all zeros and would cause a singular-matrix error.
+    Supports both the 2025 format (``colmap_binary``) and the 2026 format
+    (``colmap``, ``pose_only_in_colmap`` per-camera flag).
+    """
+    pose_only_flags = entry.get('pose_only_in_colmap', [])
     def cam2world_to_world2cam(R, t):
+        # Rᵀ(p_cam − t) → R_w2c = Rᵀ, t_w2c = −Rᵀ t
+        R = np.array(R, dtype=np.float64)
+        t = np.array(t, dtype=np.float64).reshape(3)
+        R_w2c = R.T
+        t_w2c = -R_w2c @ t
+        return R_w2c, t_w2c
     for i in range(len(entry['R'])):
         if idx is not None and i != idx:
             continue
+        # Skip cameras that exist only in COLMAP (zero K/R/t).
+        if i < len(pose_only_flags) and pose_only_flags[i]:
+            continue
         K = np.array(entry['K'][i])
+        # Guard against all-zero K from old loaders that may not set pose_only flags.
+        if np.allclose(K, 0.0):
+            continue
         R = np.array(entry['R'][i])
         t = np.array(entry['t'][i])
+        R_w2c, t_w2c = cam2world_to_world2cam(R, t)
+        plot_camera(fig, R_w2c, t_w2c, K, color=color, size=size)
+# ---------------------------------------------------------------------------
+# Depth + segmentation unprojection helpers
+# ---------------------------------------------------------------------------
+def _open_image_field(img_field):
+    """Convert an HF Image() field value (PIL Image, bytes-dict, or raw bytes) to PIL Image."""
+    import io as _io
+    from PIL import Image as PILImage
+    if img_field is None:
+        return None
+    if isinstance(img_field, PILImage.Image):
+        return img_field
+    raw = None
+    if isinstance(img_field, dict) and "bytes" in img_field:
+        raw = img_field["bytes"]
+    elif isinstance(img_field, (bytes, bytearray)):
+        raw = img_field
+    if raw is None:
+        return None
+    try:
+        return PILImage.open(_io.BytesIO(raw))
+    except Exception:
+        return None
+def _resolve_skip_colors(skip_classes):
+    """
+    Return a uint8 array of shape (K, 3) with the ADE20k RGB colours for
+    *skip_classes*, or None if no classes could be resolved.
+    Matching rules (case-insensitive):
+      1. Exact key match      – 'sky'    → 'sky'
+      2. Semicolon-part match – 'window' → 'windowpane;window'
+    Unknown names are silently ignored.
+    """
+    from hoho2025.color_mappings import ade20k_color_mapping
+    colors = []
+    for cls in skip_classes:
+        cls_lower = cls.lower()
+        if cls_lower in ade20k_color_mapping:
+            colors.append(ade20k_color_mapping[cls_lower])
+        else:
+            for key, rgb in ade20k_color_mapping.items():
+                if cls_lower in [p.strip() for p in key.split(';')]:
+                    colors.append(rgb)
+                    break
+    return np.array(colors, dtype=np.uint8) if colors else None  # (K, 3) or None
+def _unproject_depth(depth_pil, ade_rgb, K_np, R_np, t_np,
+                     target_size, depth_scale, max_depth, skip_colors_arr):
+    """
+    Shared unprojection core.  Returns (pts_world, r_ch, g_ch, b_ch) or None.
+    K_np  — 3×3 intrinsics (modified in-place to match target_size).
+    R_np  — 3×3 cam_from_world rotation.
+    t_np  — (3,) cam_from_world translation.
+    """
+    from PIL import Image as PILImage
+    W_t, H_t = target_size
+    W_d, H_d = depth_pil.size  # PIL (width, height)
+    # Rescale K from its native resolution (inferred from cx) to depth image size,
+    # then again to target_size.
+    w_K = K_np[0, 2] * 2.0
+    h_K = K_np[1, 2] * 2.0
+    if w_K > 0 and h_K > 0:
+        K_np[0, 0] *= W_d / w_K;  K_np[1, 1] *= H_d / h_K
+        K_np[0, 2] *= W_d / w_K;  K_np[1, 2] *= H_d / h_K
+    K_np[0, 0] *= W_t / W_d;  K_np[1, 1] *= H_t / H_d
+    K_np[0, 2] *= W_t / W_d;  K_np[1, 2] *= H_t / H_d
+    depth_arr = np.array(depth_pil, dtype=np.float32)
+    if depth_arr.ndim == 3:
+        depth_arr = depth_arr[:, :, 0]
+    depth_np = np.array(
+        PILImage.fromarray(depth_arr, mode='F').resize((W_t, H_t), PILImage.NEAREST),
+        dtype=np.float32) * depth_scale
+    if ade_rgb is not None:
+        ade_s = np.array(
+            PILImage.fromarray(ade_rgb).resize((W_t, H_t), PILImage.NEAREST),
+            dtype=np.uint8)
+    else:
+        ade_s = np.full((H_t, W_t, 3), 180, dtype=np.uint8)
+    valid = (depth_np > 0) & (depth_np < max_depth)
+    if skip_colors_arr is not None:
+        class_mask = np.any(
+            np.all(ade_s[:, :, None, :] == skip_colors_arr[None, None, :, :], axis=-1),
+            axis=-1)
+        valid = valid & ~class_mask
+    if not valid.any():
+        return None
+    u_grid, v_grid = np.meshgrid(np.arange(W_t, dtype=np.float64),
+                                  np.arange(H_t, dtype=np.float64))
+    pix_h   = np.stack([u_grid[valid], v_grid[valid], np.ones(valid.sum())], axis=1)
+    pts_cam = (pix_h @ np.linalg.inv(K_np).T) * depth_np[valid, None].astype(np.float64)
+    pts_world = (pts_cam - t_np.reshape(3)) @ R_np  # Rᵀ(p_cam − t)
+    return pts_world, ade_s[:, :, 0][valid], ade_s[:, :, 1][valid], ade_s[:, :, 2][valid]
+def _load_colmap_from_entry(entry):
+    """
+    Unzip and parse the COLMAP reconstruction stored in entry['colmap'] or
+    entry['colmap_binary'] (old 2025 key name).
+    Returns a pycolmap.Reconstruction, or None if the field is absent/invalid.
+    """
+    import io as _io
+    import zipfile
+    import tempfile
+    colmap_data = entry.get("colmap") or entry.get("colmap_binary")
+    if colmap_data is None:
+        return None
+    if isinstance(colmap_data, list):
+        colmap_data = bytes(colmap_data)
+    if not isinstance(colmap_data, (bytes, bytearray)) or len(colmap_data) == 0:
+        return None
+    try:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            with zipfile.ZipFile(_io.BytesIO(colmap_data), "r") as zf:
+                zf.extractall(tmpdir)
+            rec = pycolmap.Reconstruction(tmpdir)
+            return rec
+    except Exception as e:
+        print(f"Warning: could not load colmap from entry: {e}")
+        return None
+def plot_depth_and_segmentation_in_3d(
+        fig: go.Figure,
+        entry: dict,
+        idx: Optional[int] = None,
+        target_size: tuple = (128, 96),
+        depth_scale: float = 0.001,
+        max_depth: float = 64.0,
+        skip_classes: Optional[list] = None,
+        point_size: int = 2):
+    """Unproject depth maps coloured with ADE20k segmentation into a 3D scatter.
+    Uses the BPO camera parameters (entry['K'], entry['R'], entry['t']).
+    Cameras flagged as ``pose_only_in_colmap`` are skipped automatically.
+    In the 2026 format, depth/ade lists may be shorter than the full camera list
+    because pose-only cameras have no depth/image files.  This function correctly
+    matches each depth entry to its corresponding camera by building a positional
+    mapping over non-pose-only cameras.
+    Args:
+        fig: Plotly figure created by init_figure().
+        entry: Dataset entry dict (2025 or 2026 format).
+        idx: If set, only process the depth image at this position in the
+             depth list (i.e. among non-pose-only cameras).
+        target_size: (width, height) to downscale before unprojection.
+        depth_scale: Multiply raw pixel values by this to get metres.
+        max_depth: Discard pixels deeper than this (metres).
+        skip_classes: ADE20k class names to exclude (e.g. ['sky', 'tree']).
+        point_size: Plotly marker size.
+    """
+    skip_colors_arr = _resolve_skip_colors(skip_classes) if skip_classes else None
+    depths    = entry.get("depth", []) or []
+    aes       = entry.get("ade",   []) or []
+    Ks        = entry.get("K", [])
+    Rs        = entry.get("R", [])
+    ts        = entry.get("t", [])
+    pose_only = entry.get("pose_only_in_colmap", [])
+    image_ids = entry.get("image_ids", [])
+    # Build the list of camera indices (into K/R/t) that actually have depth/ade.
+    # In 2026 format, pose-only cameras are interspersed in K/R/t but absent from
+    # depth/ade lists, so we cannot use a shared positional counter.
+    non_po_cam_indices = [
+        i for i in range(len(Ks))
+        if not (i < len(pose_only) and pose_only[i]) and not np.allclose(Ks[i], 0.0)
+    ]
+    for depth_pos, cam_idx in enumerate(non_po_cam_indices):
+        if idx is not None and depth_pos != idx:
+            continue
+        if depth_pos >= len(depths):
+            break
+        depth_field = depths[depth_pos]
+        ade_field   = aes[depth_pos] if depth_pos < len(aes) else None
+        depth_pil = _open_image_field(depth_field)
+        ade_pil   = _open_image_field(ade_field)
+        if depth_pil is None:
+            continue
+        K_np = np.array(Ks[cam_idx], dtype=np.float64).copy()
+        R_np = np.array(Rs[cam_idx], dtype=np.float64)
+        t_np = np.array(ts[cam_idx], dtype=np.float64).reshape(3)
+        ade_rgb = (np.array(ade_pil.convert("RGB"), dtype=np.uint8)
+                   if ade_pil is not None else None)
+        result = _unproject_depth(
+            depth_pil, ade_rgb,
+            K_np=K_np, R_np=R_np, t_np=t_np,
+            target_size=target_size,
+            depth_scale=depth_scale,
+            max_depth=max_depth,
+            skip_colors_arr=skip_colors_arr,
+        )
+        if result is None:
+            continue
+        pts_world, r_ch, g_ch, b_ch = result
+        colors = [f"rgb({r},{g},{b})" for r, g, b in zip(r_ch, g_ch, b_ch)]
+        label  = image_ids[cam_idx] if cam_idx < len(image_ids) else str(cam_idx)
+        fig.add_trace(go.Scatter3d(
+            x=pts_world[:, 0],
+            y=pts_world[:, 1],
+            z=pts_world[:, 2],
+            mode="markers",
+            marker=dict(size=point_size, color=colors, line_width=0),
+            name=f"depth_{label}",
+            showlegend=False,
+        ))
+def plot_depth_and_segmentation_in_3d_colmap(
+        fig: go.Figure,
+        entry: dict,
+        idx: Optional[int] = None,
+        target_size: tuple = (128, 96),
+        depth_scale: float = 0.001,
+        max_depth: float = 64.0,
+        skip_classes: Optional[list] = None,
+        point_size: int = 2):
+    """Unproject depth maps into 3D using camera poses from the stored COLMAP
+    reconstruction (entry['colmap'] or entry['colmap_binary']).
+    Unlike :func:`plot_depth_and_segmentation_in_3d`, this variant reads camera
+    parameters directly from the COLMAP reconstruction.  This means all cameras
+    registered in COLMAP are available, including those flagged as
+    ``pose_only_in_colmap``.
+    Depth images are matched to COLMAP cameras by ``image_id`` (the same
+    lexicographic order used by ds_loader_2026.py for non-pose-only cameras).
+    Args:
+        fig: Plotly figure created by :func:`init_figure`.
+        entry: Dataset entry with keys ``colmap``/``colmap_binary``, ``depth``,
+               ``ade``, ``image_ids``, ``pose_only_in_colmap``.
+        idx: If set, only process the depth image at this position in the
+             depth list (i.e. among non-pose-only cameras).
+        target_size: ``(width, height)`` for downscaling before unprojection.
+        depth_scale: Scale factor to convert raw pixel values to metres.
+        max_depth: Discard pixels whose depth exceeds this value.
+        skip_classes: ADE20k class names to exclude (e.g. ``['sky', 'tree']``).
+        point_size: Plotly marker size.
+    """
+    from PIL import Image as PILImage
+    skip_colors_arr = _resolve_skip_colors(skip_classes) if skip_classes else None
+    rec = _load_colmap_from_entry(entry)
+    if rec is None:
+        print("plot_depth_and_segmentation_in_3d_colmap: no colmap in entry")
+        return
+    depths    = entry.get("depth", []) or []
+    aes       = entry.get("ade",   []) or []
+    image_ids = entry.get("image_ids", [])
+    pose_only = entry.get("pose_only_in_colmap", [])
+    # Build img_id → (K, R, t) from the COLMAP reconstruction.
+    # Image names may be raw ("image_{img_id}_order_{order_id}.jpg") or
+    # anonymised hashes ("{img_id}.jpg") depending on the dataset version.
+    colmap_cam_map = {}
+    for _, img in rec.images.items():
+        parts = img.name.split('_')
+        img_id = parts[1] if len(parts) >= 2 else img.name.split('.')[0]
+        cam    = rec.cameras[img.camera_id]
+        K_c    = cam.calibration_matrix()
+        R_c    = img.cam_from_world.rotation.matrix()
+        t_c    = img.cam_from_world.translation
+        colmap_cam_map[img_id] = (K_c, R_c, t_c)
+    # Non-pose-only image IDs in sorted order — these have depth/ade entries.
+    non_po_ids = [
+        image_ids[i] for i in range(len(image_ids))
+        if not (i < len(pose_only) and pose_only[i])
+    ]
+    for depth_pos, img_id in enumerate(non_po_ids):
+        if idx is not None and depth_pos != idx:
+            continue
+        if depth_pos >= len(depths):
+            break
+        depth_field = depths[depth_pos]
+        ade_field   = aes[depth_pos] if depth_pos < len(aes) else None
+        depth_pil = _open_image_field(depth_field)
+        ade_pil   = _open_image_field(ade_field)
+        if depth_pil is None:
+            continue
+        if img_id not in colmap_cam_map:
+            continue
+        K_c, R_c, t_c = colmap_cam_map[img_id]
+        ade_rgb = (np.array(ade_pil.convert("RGB"), dtype=np.uint8)
+                   if ade_pil is not None else None)
+        result = _unproject_depth(
+            depth_pil, ade_rgb,
+            K_np=K_c.astype(np.float64).copy(),
+            R_np=R_c.astype(np.float64),
+            t_np=t_c.astype(np.float64),
+            target_size=target_size,
+            depth_scale=depth_scale,
+            max_depth=max_depth,
+            skip_colors_arr=skip_colors_arr,
+        )
+        if result is None:
+            continue
+        pts_world, r_ch, g_ch, b_ch = result
+        colors = [f"rgb({r},{g},{b})" for r, g, b in zip(r_ch, g_ch, b_ch)]
+        fig.add_trace(go.Scatter3d(
+            x=pts_world[:, 0],
+            y=pts_world[:, 1],
+            z=pts_world[:, 2],
+            mode="markers",
+            marker=dict(size=point_size, color=colors, line_width=0),
+            name=f"colmap_depth_{img_id}",
+            showlegend=False,
+        ))

notebooks/{example.ipynb → example_2025.ipynb} RENAMED Viewed

The diff for this file is too large to render. See raw diff

notebooks/example_2026.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 datasets
 huggingface-hub
 ipywidgets
@@ -6,9 +7,9 @@ numpy
 opencv-python
 Pillow
 plotly
-pycolmap
 scipy
 torch
 trimesh
 webdataset
-manifold3d # for metric computation

+# Python >= 3.10 required (see setup.py)
 datasets
 huggingface-hub
 ipywidgets
 opencv-python
 Pillow
 plotly
+pycolmap>=0.6
 scipy
 torch
 trimesh
 webdataset
+manifold3d  # for metric computation