Xallt
/

hoho2025-submission

Model card Files Files and versions

xet

Community

Xallt commited on Apr 11, 2025

Commit

7058f30

1 Parent(s): 10d8e71

Update solution

Browse files

Files changed (2) hide show

example_solutions_copy.py +333 -92
process_sample.py +5 -11

example_solutions_copy.py CHANGED Viewed

@@ -2,19 +2,69 @@ import io
 import tempfile
 import zipfile
 from collections import defaultdict
 from typing import List, Tuple
 import cv2
 import numpy as np
 import pycolmap
 from hoho2025.color_mappings import ade20k_color_mapping, gestalt_color_mapping
-from PIL import Image as PImage
 from scipy.spatial.distance import cdist
-def empty_solution():
     """Return a minimal valid solution, i.e. 2 vertices and 1 edge."""
-    return np.zeros((2, 3)), [(0, 1)]
 def read_colmap_rec(colmap_data):
@@ -59,23 +109,169 @@ def get_house_mask(ade20k_seg):
     return full_mask
-def point_to_segment_dist(pt, seg_p1, seg_p2):
-    """
-    Computes the Euclidean distance from pt to the line segment p1->p2.
-    pt, seg_p1, seg_p2: (x, y) as np.ndarray
-    """
     # If both endpoints are the same, just return distance to one of them
     if np.allclose(seg_p1, seg_p2):
         return np.linalg.norm(pt - seg_p1)
     seg_vec = seg_p2 - seg_p1
     pt_vec = pt - seg_p1
-    seg_len2 = seg_vec.dot(seg_vec)
-    t = max(0, min(1, pt_vec.dot(seg_vec) / seg_len2))
     proj = seg_p1 + t * seg_vec
     return np.linalg.norm(pt - proj)
-def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=25.0):
     """
     Identify apex and eave-end vertices, then detect lines for eave/ridge/rake/valley.
     For each connected component, we do a line fit with cv2.fitLine, then measure
@@ -190,11 +386,14 @@ def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=25.0):
                     conn = tuple(sorted((vA, vB)))
                     connections.append(conn)
-    return vertices, connections
 def get_uv_depth(
-    vertices: List[dict],
     depth_fitted: np.ndarray,
     sparse_depth: np.ndarray,
     search_radius: int = 10,
@@ -211,7 +410,7 @@ def get_uv_depth(
     Parameters
     ----------
-    vertices : List[dict]
         Each dict must have "xy" at least, e.g. {"xy": (x, y), ...}
     depth_fitted : np.ndarray
         A 2D array (H, W), the dense (or corrected) depth for fallback.
@@ -229,7 +428,7 @@ def get_uv_depth(
     """
     # Collect each vertex's (x, y)
-    uv = np.array([vert["xy"] for vert in vertices], dtype=np.float32)
     # Convert to integer pixel coordinates (round or floor)
     uv_int = np.round(uv).astype(np.int32)
@@ -277,7 +476,7 @@ def get_uv_depth(
 def project_vertices_to_3d(
-    uv: np.ndarray, depth_vert: np.ndarray, col_img: pycolmap.Image
 ) -> np.ndarray:
     """
     Projects 2D vertex coordinates with associated depths to 3D world coordinates.
@@ -316,21 +515,21 @@ def project_vertices_to_3d(
 def create_3d_wireframe_single_image(
-    vertices: List[dict],
-    connections: List[Tuple[int, int]],
-    depth: PImage,
     colmap_rec: pycolmap.Reconstruction,
     img_id: str,
-    ade_seg: PImage,
-) -> np.ndarray:
     """
     Processes a single image view to generate 3D vertex coordinates from existing 2D vertices/edges.
     Parameters
     ----------
-    vertices : List[dict]
         List of 2D vertex dictionaries (e.g., {"xy": (x, y), "type": ...}).
-    connections : List[Tuple[int, int]]
         List of 2D edge connections (indices into the vertices list).
     depth : PIL.Image
         Initial dense depth map as a PIL Image.
@@ -353,7 +552,7 @@ def create_3d_wireframe_single_image(
         print(
             f"Warning: create_3d_wireframe_single_image called with insufficient vertices/connections for image {img_id}"
         )
-        return np.empty((0, 3))
     # Get fitted dense depth and sparse depth
     depth_fitted, depth_sparse, found_sparse, col_img = get_fitted_dense_depth(
@@ -366,35 +565,54 @@ def create_3d_wireframe_single_image(
     # Backproject to 3D
     vertices_3d = project_vertices_to_3d(uv, depth_vert, col_img)
-    return vertices_3d
-def merge_vertices_3d(vert_edge_per_image, th=0.5):
     """Merge vertices that are close to each other in 3D space and are of same types"""
     # Initialize structures to collect vertices and connections from all images
-    all_3d_vertices = []
-    connections_3d = []
-    all_indexes = []
     cur_start = 0
-    types = []
     # Combine vertices and update connection indices across all images
-    for cimg_idx, (vertices, connections, vertices_3d) in vert_edge_per_image.items():
-        types += [int(v["type"] == "apex") for v in vertices]
-        all_3d_vertices.append(vertices_3d)
-        connections_3d += [(x + cur_start, y + cur_start) for (x, y) in connections]
         cur_start += len(vertices_3d)
-    all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
     # Calculate distance matrix between all vertices
     distmat = cdist(all_3d_vertices, all_3d_vertices)
-    types = np.array(types).reshape(-1, 1)
-    same_types = cdist(types, types)
     # Create mask for vertices that should be merged (close in space and same type)
-    mask_to_merge = (distmat <= th) & (same_types == 0)
-    new_vertices = []
-    new_connections = []
     # Extract vertex indices to merge based on the mask
     to_merge = sorted(
@@ -426,11 +644,15 @@ def merge_vertices_3d(vert_edge_per_image, th=0.5):
     old_idx_to_new = {}
     count = 0
     for idxs in merged:
-        new_vertices.append(all_3d_vertices[idxs].mean(axis=0))
         for idx in idxs:
             old_idx_to_new[idx] = count
         count += 1
-    new_vertices = np.array(new_vertices)
     # Update connections to use new vertex indices
     for conn in connections_3d:
@@ -438,29 +660,29 @@ def merge_vertices_3d(vert_edge_per_image, th=0.5):
         if new_con[0] == new_con[1]:
             continue
         if new_con not in new_connections:
-            new_connections.append(new_con)
-    return new_vertices, new_connections
-def prune_not_connected(all_3d_vertices, connections_3d, keep_largest=True):
     """
     Prune vertices not connected to anything. If keep_largest=True, also
     keep only the largest connected component in the graph.
     """
-    if len(all_3d_vertices) == 0:
         return np.array([]), []
     # adjacency
     adj = defaultdict(set)
-    for i, j in connections_3d:
-        adj[i].add(j)
-        adj[j].add(i)
     # keep only vertices that appear in at least one edge
     used_idxs = set()
-    for i, j in connections_3d:
-        used_idxs.add(i)
-        used_idxs.add(j)
     if not used_idxs:
         return np.empty((0, 3)), []
@@ -471,12 +693,13 @@ def prune_not_connected(all_3d_vertices, connections_3d, keep_largest=True):
         used_list = sorted(list(used_idxs))
         for new_id, old_id in enumerate(used_list):
             new_map[old_id] = new_id
-        new_vertices = np.array([all_3d_vertices[old_id] for old_id in used_list])
         new_conns = []
-        for i, j in connections_3d:
-            if i in used_idxs and j in used_idxs:
-                new_conns.append((new_map[i], new_map[j]))
-        return new_vertices, new_conns
     # Otherwise find the largest connected component:
     visited = set()
@@ -510,15 +733,16 @@ def prune_not_connected(all_3d_vertices, connections_3d, keep_largest=True):
     for new_id, old_id in enumerate(largest):
         new_map[old_id] = new_id
-    new_vertices = np.array([all_3d_vertices[old_id] for old_id in largest])
-    new_conns = []
-    for i, j in connections_3d:
-        if i in largest and j in largest:
-            new_conns.append((new_map[i], new_map[j]))
     # remove duplicates
-    new_conns = list(set([tuple(sorted(c)) for c in new_conns]))
-    return new_vertices, new_conns
 def get_sparse_depth(colmap_rec, img_id_substring, depth_shape):
@@ -530,7 +754,7 @@ def get_sparse_depth(colmap_rec, img_id_substring, depth_shape):
     H, W = depth_shape
     # 1) Find the matching COLMAP image
-    found_img: pycolmap.Image = None
     for img_id_c, col_img in colmap_rec.images.items():
         if img_id_substring in col_img.name:
             found_img = col_img
@@ -555,7 +779,11 @@ def get_sparse_depth(colmap_rec, img_id_substring, depth_shape):
     z_vals = []
     for xyz in points_xyz:
         proj = found_img.project_point(xyz)  # returns (u, v) in image coords or None
-        cur_res = np.array([found_img.camera.height, found_img.camera.width])
         exp_res = np.array([H, W])
         proj = proj * exp_res / cur_res
@@ -569,6 +797,8 @@ def get_sparse_depth(colmap_rec, img_id_substring, depth_shape):
                 # We'll compute depth as Z in camera coords
                 # from the world->cam transform col_img holds
                 mat4x4 = np.eye(4)
                 mat4x4[:3, :4] = found_img.cam_from_world.matrix()
                 p_cam = mat4x4 @ np.array([xyz[0], xyz[1], xyz[2], 1.0])
                 z_vals.append(p_cam[2] / p_cam[3])
@@ -661,7 +891,7 @@ def get_fitted_dense_depth(depth, colmap_rec, img_id, ade20k_seg):
     return depth_fitted, depth_sparse, True, col_img
-def prune_too_far(all_3d_vertices, connections_3d, colmap_rec, th=3.0):
     """
     Prune vertices that are too far from sparse point cloud
@@ -669,28 +899,36 @@ def prune_too_far(all_3d_vertices, connections_3d, colmap_rec, th=3.0):
     xyz_sfm = []
     for k, v in colmap_rec.points3D.items():
         xyz_sfm.append(v.xyz)
-    xyz_sfm = np.array(xyz_sfm)
-    distmat = cdist(all_3d_vertices, xyz_sfm)
-    mindist = distmat.min(axis=1)
     mask = mindist <= th
-    all_3d_vertices_new = all_3d_vertices[mask]
-    old_idx_survived = np.arange(len(all_3d_vertices))[mask]
-    new_idxs = np.arange(len(all_3d_vertices_new))
-    old_to_new_idx = dict(zip(old_idx_survived, new_idxs))
     connections_3d_new = [
-        (old_to_new_idx[conn[0]], old_to_new_idx[conn[1]])
-        for conn in connections_3d
-        if mask[conn[0]] and mask[conn[1]]
     ]
-    return all_3d_vertices_new, connections_3d_new
-def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
     """
     Predict 3D wireframe from a dataset entry.
     """
     good_entry = convert_entry_to_human_readable(entry)
-    vert_edge_per_image = {}
     for i, (gest, depth, K, R, t, img_id, ade_seg) in enumerate(
         zip(
             good_entry["gestalt"],
@@ -712,14 +950,18 @@ def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
         gest_seg_np = np.array(gest_seg).astype(np.uint8)
         # Get 2D vertices and edges first
-        vertices, connections = get_vertices_and_edges_from_segmentation(
             gest_seg_np, edge_th=10.0
         )
         # Check if we have enough to proceed
         if (len(vertices) < 2) or (len(connections) < 1):
             print(f"Not enough vertices or connections found in image {i}, skipping.")
-            vert_edge_per_image[i] = [], [], np.empty((0, 3))
             continue
         # Call the refactored function to get 3D points
@@ -727,19 +969,18 @@ def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
             vertices, connections, depth, colmap_rec, img_id, ade_seg
         )
         # Store original 2D vertices, connections, and computed 3D points
-        vert_edge_per_image[i] = vertices, connections, vertices_3d
     # Merge vertices from all images
-    all_3d_vertices, connections_3d = merge_vertices_3d(vert_edge_per_image, 0.5)
-    all_3d_vertices_clean, connections_3d_clean = prune_not_connected(
-        all_3d_vertices, connections_3d, keep_largest=False
-    )
-    all_3d_vertices_clean, connections_3d_clean = prune_too_far(
-        all_3d_vertices_clean, connections_3d_clean, colmap_rec, th=4.0
-    )
-    if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1:
         print("Not enough vertices or connections in the 3D vertices")
         return empty_solution()
-    return all_3d_vertices_clean, connections_3d_clean

 import tempfile
 import zipfile
 from collections import defaultdict
+from dataclasses import dataclass
 from typing import List, Tuple
 import cv2
 import numpy as np
 import pycolmap
 from hoho2025.color_mappings import ade20k_color_mapping, gestalt_color_mapping
 from scipy.spatial.distance import cdist
+@dataclass
+class WireframePoint2D:
+    xy: np.ndarray
+    type: str
+@dataclass
+class WireframeEdge:
+    i1: int
+    i2: int
+@dataclass
+class Wireframe2D:
+    vertices: List[WireframePoint2D]
+    edges: List[WireframeEdge]
+@dataclass
+class WireframePoint3D:
+    xyz: np.ndarray
+    type: str
+@dataclass
+class Wireframe2DWith3D:
+    wireframe2d: Wireframe2D
+    vertices_3d: List[WireframePoint3D]
+@dataclass
+class Wireframe3D:
+    vertices: List[WireframePoint3D]
+    edges: List[WireframeEdge]
+    @property
+    def vertices_np(self) -> np.ndarray:
+        return np.array([v.xyz for v in self.vertices])
+    @property
+    def edges_np(self) -> np.ndarray:
+        return np.array([[e.i1, e.i2] for e in self.edges])
+def empty_solution() -> Wireframe3D:
     """Return a minimal valid solution, i.e. 2 vertices and 1 edge."""
+    return Wireframe3D(
+        vertices=[
+            WireframePoint3D(xyz=np.zeros((3,)), type=""),
+            WireframePoint3D(xyz=np.zeros((3,)), type=""),
+        ],
+        edges=[WireframeEdge(i1=0, i2=1)],
+    )
 def read_colmap_rec(colmap_data):
     return full_mask
+def point_to_segment_proj(pt, seg_p1, seg_p2):
     # If both endpoints are the same, just return distance to one of them
     if np.allclose(seg_p1, seg_p2):
         return np.linalg.norm(pt - seg_p1)
     seg_vec = seg_p2 - seg_p1
     pt_vec = pt - seg_p1
+    seg_len2 = np.linalg.norm(seg_vec) ** 2
+    t = max(0, min(1, np.dot(pt_vec, seg_vec) / seg_len2))
     proj = seg_p1 + t * seg_vec
+    return proj
+def point_to_segment_dist(pt, seg_p1, seg_p2):
+    """
+    Computes the Euclidean distance from pt to the line segment p1->p2.
+    pt, seg_p1, seg_p2: (x, y) as np.ndarray
+    """
+    proj = point_to_segment_proj(pt, seg_p1, seg_p2)
     return np.linalg.norm(pt - proj)
+def combine_segs(keys, gestalt_img) -> np.ndarray:
+    res = np.zeros(gestalt_img.shape[:2], dtype=bool)
+    for key in keys:
+        color = np.array(gestalt_color_mapping[key])
+        mask = cv2.inRange(gestalt_img, color - 0.5, color + 0.5)
+        res = res | mask.astype(bool)
+    return res
+def get_turn_angles(contour):
+    angles = []
+    vcur = contour[:, 0]  # (N, 2)
+    vprev = np.concatenate([vcur[-1, None], vcur[:-1]])  # (N, 2)
+    vnext = np.concatenate([vcur[1:], vcur[0, None]])  # (N, 2)
+    vecprev, vecnext = vcur - vprev, vnext - vcur
+    vecprev = vecprev / np.linalg.norm(vecprev, axis=1, keepdims=True)
+    vecnext = vecnext / np.linalg.norm(vecnext, axis=1, keepdims=True)
+    def dot(a, b):
+        return (a * b).sum(axis=-1)
+    angles = np.degrees(np.arctan2(np.cross(vecprev, vecnext), dot(vecprev, vecnext)))
+    return angles
+def slice_arr(arr, i, j):
+    if i <= j:
+        if j <= len(arr):
+            return arr[i:j]
+        else:
+            return np.concatenate([arr[i:], arr[: j - len(arr)]])
+    else:
+        return np.concatenate([arr[i:], arr[:j]])
+def group_segments(segments):
+    segments = sorted(segments, key=lambda x: x[0])
+    grouped = []
+    for i in range(len(segments)):
+        if i == 0:
+            grouped.append(segments[i])
+        else:
+            if segments[i][0] <= grouped[-1][1]:
+                grouped[-1] = (grouped[-1][0], max(grouped[-1][1], segments[i][1]))
+            else:
+                grouped.append(segments[i])
+    return grouped
+def get_contour_interesting_points_indices(contour):
+    angles = get_turn_angles(contour)
+    angle_len = cv2.arcLength(contour, True) / 20
+    interesting_segments = []
+    interesting_points = []
+    for i in range(len(angles)):
+        j = i + 1
+        while True:
+            cur_len = cv2.arcLength(slice_arr(contour, i, j), False)
+            if cur_len > angle_len:
+                break
+            j += 1
+        # i:j is smaller than angle_len
+        turns = np.cumsum(slice_arr(angles, i, j))
+        k = 2
+        if len(turns) > k and np.abs(turns[k:]).max() > 70:
+            matching_i = np.where(np.abs(turns[k:]) > 70)[0][0] + k + i
+            interesting_segments.append((i, int(matching_i)))
+            interesting_points.append(i)
+    grouped_segments = group_segments(interesting_segments)
+    return [((i + j) // 2) % len(contour) for i, j in grouped_segments]
+    # return interesting_points
+def get_contour_interesting_wireframe(contour) -> Tuple[np.ndarray, np.ndarray]:
+    indices = get_contour_interesting_points_indices(contour)
+    connections = []
+    for i in range(len(indices)):
+        i1, i2 = indices[i], indices[(i + 1) % len(indices)]
+        segment_len = np.linalg.norm(contour[i1, 0] - contour[i2, 0])
+        points_side1 = slice_arr(contour[:, 0], i1, i2)
+        points_side2 = slice_arr(contour[:, 0], i2, i1)
+        points_side1_distances = np.array(
+            [
+                point_to_segment_dist(p, contour[i1, 0], contour[i2, 0])
+                for p in points_side1
+            ]
+        )
+        points_side2_distances = np.array(
+            [
+                point_to_segment_dist(p, contour[i2, 0], contour[i1, 0])
+                for p in points_side2
+            ]
+        )
+        dist_side_1 = (
+            points_side1_distances.max() if len(points_side1_distances) > 0 else 0
+        )
+        dist_side_2 = (
+            points_side2_distances.max() if len(points_side2_distances) > 0 else 0
+        )
+        factor = 0.1
+        if dist_side_1 <= segment_len * factor or dist_side_2 <= segment_len * factor:
+            connections.append((i, (i + 1) % len(indices)))
+    return contour[indices, 0], np.array(connections)
+def get_vertices_and_edges_from_segmentation_contours(
+    gest_seg_np, edge_th=25.0
+) -> Wireframe2D:
+    gest_seg_np = np.array(gest_seg_np)
+    keys_segments = ["eave", "ridge", "rake", "valley"]
+    all_contours = []
+    for key in keys_segments:
+        mask = combine_segs([key], gest_seg_np)
+        contours, _ = cv2.findContours(
+            mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS
+        )
+        all_contours.extend(contours)
+    # contours = contours[::-1]
+    all_vertices: list[WireframePoint2D] = []
+    all_connections: list[WireframeEdge] = []
+    for contour in all_contours:
+        area = cv2.contourArea(contour, oriented=True)
+        if area < 0:
+            contour = contour[::-1]
+        interesting_points, interesting_connections = get_contour_interesting_wireframe(
+            contour
+        )
+        all_vertices.extend(
+            WireframePoint2D(xy=p, type=key) for p in interesting_points
+        )
+        all_connections.extend(
+            WireframeEdge(i1=i1, i2=i2) for i1, i2 in interesting_connections
+        )
+    return Wireframe2D(all_vertices, all_connections)
+def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=25.0) -> Wireframe2D:
     """
     Identify apex and eave-end vertices, then detect lines for eave/ridge/rake/valley.
     For each connected component, we do a line fit with cv2.fitLine, then measure
                     conn = tuple(sorted((vA, vB)))
                     connections.append(conn)
+    vertices = [WireframePoint2D(xy=v["xy"], type=v["type"]) for v in vertices]
+    connections = [WireframeEdge(i1=c[0], i2=c[1]) for c in connections]
+    return Wireframe2D(vertices, connections)
 def get_uv_depth(
+    vertices: List[WireframePoint2D],
     depth_fitted: np.ndarray,
     sparse_depth: np.ndarray,
     search_radius: int = 10,
     Parameters
     ----------
+    vertices : List[WireframePoint2D]
         Each dict must have "xy" at least, e.g. {"xy": (x, y), ...}
     depth_fitted : np.ndarray
         A 2D array (H, W), the dense (or corrected) depth for fallback.
     """
     # Collect each vertex's (x, y)
+    uv = np.array([vert.xy for vert in vertices], dtype=np.float32)
     # Convert to integer pixel coordinates (round or floor)
     uv_int = np.round(uv).astype(np.int32)
 def project_vertices_to_3d(
+    uv: np.ndarray, depth_vert: np.ndarray, col_img
 ) -> np.ndarray:
     """
     Projects 2D vertex coordinates with associated depths to 3D world coordinates.
 def create_3d_wireframe_single_image(
+    vertices: List[WireframePoint2D],
+    connections: List[WireframeEdge],
+    depth,
     colmap_rec: pycolmap.Reconstruction,
     img_id: str,
+    ade_seg,
+) -> List[WireframePoint3D]:
     """
     Processes a single image view to generate 3D vertex coordinates from existing 2D vertices/edges.
     Parameters
     ----------
+    vertices : List[WireframePoint2D]
         List of 2D vertex dictionaries (e.g., {"xy": (x, y), "type": ...}).
+    connections : List[WireframeEdge]
         List of 2D edge connections (indices into the vertices list).
     depth : PIL.Image
         Initial dense depth map as a PIL Image.
         print(
             f"Warning: create_3d_wireframe_single_image called with insufficient vertices/connections for image {img_id}"
         )
+        return []
     # Get fitted dense depth and sparse depth
     depth_fitted, depth_sparse, found_sparse, col_img = get_fitted_dense_depth(
     # Backproject to 3D
     vertices_3d = project_vertices_to_3d(uv, depth_vert, col_img)
+    return [
+        WireframePoint3D(xyz=v, type=vertices[i].type)
+        for i, v in enumerate(vertices_3d)
+    ]
+def merge_vertices_3d(
+    vert_edge_per_image: dict[int, Wireframe2DWith3D], th=0.5
+) -> Wireframe3D:
     """Merge vertices that are close to each other in 3D space and are of same types"""
     # Initialize structures to collect vertices and connections from all images
+    all_3d_vertices_list: list[np.ndarray] = []
+    connections_3d: list[tuple[int, int]] = []
     cur_start = 0
+    types: list[int] = []
+    all_types_set: set[str] = set()
+    for _, wireframe2d_with_3d in vert_edge_per_image.items():
+        all_types_set.update([v.type for v in wireframe2d_with_3d.wireframe2d.vertices])
+    all_types = list(all_types_set)
+    type_idx_map = {t: i for i, t in enumerate(all_types)}
+    all_wireframe_points_3d: list[WireframePoint3D] = []
     # Combine vertices and update connection indices across all images
+    for cimg_idx, wireframe2d_with_3d in vert_edge_per_image.items():
+        vertices = wireframe2d_with_3d.wireframe2d.vertices
+        connections = wireframe2d_with_3d.wireframe2d.edges
+        vertices_3d: np.ndarray = np.array(
+            [v.xyz for v in wireframe2d_with_3d.vertices_3d]
+        )
+        types += [type_idx_map[v.type] for v in vertices]
+        all_wireframe_points_3d.extend(wireframe2d_with_3d.vertices_3d)
+        all_3d_vertices_list.append(vertices_3d)
+        connections_3d += [
+            (con.i1 + cur_start, con.i2 + cur_start) for con in connections
+        ]
         cur_start += len(vertices_3d)
+    all_3d_vertices = np.concatenate(all_3d_vertices_list, axis=0)
+    types_np = np.array(types)
     # Calculate distance matrix between all vertices
     distmat = cdist(all_3d_vertices, all_3d_vertices)
+    same_types = types_np[:, None] == types_np[None, :]
     # Create mask for vertices that should be merged (close in space and same type)
+    mask_to_merge = (distmat <= th) & same_types
+    new_vertices: list[WireframePoint3D] = []
+    new_connections: list[WireframeEdge] = []
     # Extract vertex indices to merge based on the mask
     to_merge = sorted(
     old_idx_to_new = {}
     count = 0
     for idxs in merged:
+        types_cur = [all_wireframe_points_3d[i].type for i in idxs]
+        assert len(set(types_cur)) == 1
+        new_vertices.append(
+            WireframePoint3D(xyz=all_3d_vertices[idxs].mean(axis=0), type=types_cur[0])
+        )
         for idx in idxs:
             old_idx_to_new[idx] = count
         count += 1
     # Update connections to use new vertex indices
     for conn in connections_3d:
         if new_con[0] == new_con[1]:
             continue
         if new_con not in new_connections:
+            new_connections.append(WireframeEdge(i1=new_con[0], i2=new_con[1]))
+    return Wireframe3D(new_vertices, new_connections)
+def prune_not_connected(wireframe_3d: Wireframe3D, keep_largest=True):
     """
     Prune vertices not connected to anything. If keep_largest=True, also
     keep only the largest connected component in the graph.
     """
+    if len(wireframe_3d.vertices) == 0:
         return np.array([]), []
     # adjacency
     adj = defaultdict(set)
+    for edge in wireframe_3d.edges:
+        adj[edge.i1].add(edge.i2)
+        adj[edge.i2].add(edge.i1)
     # keep only vertices that appear in at least one edge
     used_idxs = set()
+    for edge in wireframe_3d.edges:
+        used_idxs.add(edge.i1)
+        used_idxs.add(edge.i2)
     if not used_idxs:
         return np.empty((0, 3)), []
         used_list = sorted(list(used_idxs))
         for new_id, old_id in enumerate(used_list):
             new_map[old_id] = new_id
+        new_vertices = [wireframe_3d.vertices[i] for i in used_list]
         new_conns = []
+        for edge in wireframe_3d.edges:
+            if edge.i1 in used_idxs and edge.i2 in used_idxs:
+                new_conns.append(edge)
+        return Wireframe3D(new_vertices, new_conns)
     # Otherwise find the largest connected component:
     visited = set()
     for new_id, old_id in enumerate(largest):
         new_map[old_id] = new_id
+    new_vertices = [wireframe_3d.vertices[i] for i in largest]
+    new_conns = [
+        WireframeEdge(i1=new_map[edge.i1], i2=new_map[edge.i2])
+        for edge in wireframe_3d.edges
+        if edge.i1 in largest and edge.i2 in largest
+    ]
     # remove duplicates
+    new_conns = list(set(new_conns))
+    return Wireframe3D(new_vertices, new_conns)
 def get_sparse_depth(colmap_rec, img_id_substring, depth_shape):
     H, W = depth_shape
     # 1) Find the matching COLMAP image
+    found_img: pycolmap.Image | None = None
     for img_id_c, col_img in colmap_rec.images.items():
         if img_id_substring in col_img.name:
             found_img = col_img
     z_vals = []
     for xyz in points_xyz:
         proj = found_img.project_point(xyz)  # returns (u, v) in image coords or None
+        found_camera = found_img.camera
+        if found_camera is None:
+            print(f"Camera for {found_img.name} is None.")
+            return np.zeros((H, W), dtype=np.float32), False, found_img
+        cur_res = np.array([found_camera.height, found_camera.width])
         exp_res = np.array([H, W])
         proj = proj * exp_res / cur_res
                 # We'll compute depth as Z in camera coords
                 # from the world->cam transform col_img holds
                 mat4x4 = np.eye(4)
+                if found_img.cam_from_world is None:
+                    raise ValueError(f"Camera for {found_img.name} is None.")
                 mat4x4[:3, :4] = found_img.cam_from_world.matrix()
                 p_cam = mat4x4 @ np.array([xyz[0], xyz[1], xyz[2], 1.0])
                 z_vals.append(p_cam[2] / p_cam[3])
     return depth_fitted, depth_sparse, True, col_img
+def prune_too_far(wireframe_3d, colmap_rec, th=3.0):
     """
     Prune vertices that are too far from sparse point cloud
     xyz_sfm = []
     for k, v in colmap_rec.points3D.items():
         xyz_sfm.append(v.xyz)
+    xyz_sfm = np.array(xyz_sfm)  # (M, 3)
+    vertices_np = np.array([v.xyz for v in wireframe_3d.vertices])  # (N, 3)
+    distmat = cdist(vertices_np, xyz_sfm)  # (N, M)
+    mindist = distmat.min(axis=1)  # (N,)
     mask = mindist <= th
+    vertices_new: list[WireframePoint3D] = [
+        v for v, m in zip(wireframe_3d.vertices, mask) if m
+    ]
+    old_idx_survived = np.arange(len(wireframe_3d.vertices))[mask]
+    old_to_new_idx = {old_idx_survived[i]: i for i in range(len(old_idx_survived))}
     connections_3d_new = [
+        WireframeEdge(i1=int(old_to_new_idx[conn.i1]), i2=int(old_to_new_idx[conn.i2]))
+        for conn in wireframe_3d.edges
+        if conn.i1 in old_to_new_idx and conn.i2 in old_to_new_idx
     ]
+    return Wireframe3D(
+        vertices_new,
+        connections_3d_new,
+    )
+def predict_wireframe(entry) -> Wireframe3D:
     """
     Predict 3D wireframe from a dataset entry.
     """
     good_entry = convert_entry_to_human_readable(entry)
+    vert_edge_per_image: dict[int, Wireframe2DWith3D] = {}
     for i, (gest, depth, K, R, t, img_id, ade_seg) in enumerate(
         zip(
             good_entry["gestalt"],
         gest_seg_np = np.array(gest_seg).astype(np.uint8)
         # Get 2D vertices and edges first
+        wireframe2d = get_vertices_and_edges_from_segmentation_contours(
             gest_seg_np, edge_th=10.0
         )
+        vertices = wireframe2d.vertices
+        connections = wireframe2d.edges
         # Check if we have enough to proceed
         if (len(vertices) < 2) or (len(connections) < 1):
             print(f"Not enough vertices or connections found in image {i}, skipping.")
+            vert_edge_per_image[i] = Wireframe2DWith3D(
+                wireframe2d=wireframe2d, vertices_3d=[]
+            )
             continue
         # Call the refactored function to get 3D points
             vertices, connections, depth, colmap_rec, img_id, ade_seg
         )
         # Store original 2D vertices, connections, and computed 3D points
+        vert_edge_per_image[i] = Wireframe2DWith3D(
+            wireframe2d=wireframe2d, vertices_3d=vertices_3d
+        )
     # Merge vertices from all images
+    wireframe_3d = merge_vertices_3d(vert_edge_per_image, 0.5)
+    # wireframe_3d_clean = prune_not_connected(wireframe_3d, keep_largest=False)
+    wireframe_3d_clean = wireframe_3d
+    wireframe_3d_clean = prune_too_far(wireframe_3d_clean, colmap_rec, th=4.0)
+    if (len(wireframe_3d_clean.vertices) < 2) or len(wireframe_3d_clean.edges) < 1:
         print("Not enough vertices or connections in the 3D vertices")
         return empty_solution()
+    return wireframe_3d_clean

process_sample.py CHANGED Viewed

@@ -2,10 +2,9 @@ import io
 import tempfile
 import zipfile
-import numpy as np
 import pycolmap
-from example_solutions_copy import predict_wireframe
 def read_colmap_rec(colmap_data):
@@ -17,21 +16,16 @@ def read_colmap_rec(colmap_data):
         return rec
-def empty_solution():
-    """Return a minimal valid solution, i.e. 2 vertices and 1 edge."""
-    return np.zeros((2, 3)), [(0, 1)]
 def process_sample(sample, handle_error=True):
     try:
-        pred_vertices, pred_edges = predict_wireframe(sample)
     except Exception:
         if handle_error:
-            pred_vertices, pred_edges = empty_solution()
         else:
             raise
     return {
         "order_id": sample["order_id"],
-        "wf_vertices": pred_vertices.tolist(),
-        "wf_edges": pred_edges,
     }

 import tempfile
 import zipfile
 import pycolmap
+from example_solutions_copy import empty_solution, predict_wireframe
 def read_colmap_rec(colmap_data):
         return rec
 def process_sample(sample, handle_error=True):
     try:
+        pred_wireframe = predict_wireframe(sample)
     except Exception:
         if handle_error:
+            pred_wireframe = empty_solution()
         else:
             raise
     return {
         "order_id": sample["order_id"],
+        "wf_vertices": pred_wireframe.vertices_np,
+        "wf_edges": pred_wireframe.edges_np,
     }