Refactors COLMAP point projection for vertex detection

Simplifies vertex detection by introducing dedicated functions for projecting COLMAP points into 2D image coordinates. This change enhances code readability and maintainability. The original combined function is separated into smaller, modular functions, each responsible for a specific task: projecting visible points, projecting to 2D, and getting apex/eave points.

These helper functions enhance modularity and readability.

Files changed (2) hide show

predict.py +87 -167
train.py +1 -1

predict.py CHANGED Viewed

@@ -413,10 +413,10 @@ def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
         gest_seg = gest.resize(depth_size)
         gest_seg_np = np.array(gest_seg).astype(np.uint8)
-        vertices_ours, connections_ours, vertices_3d_ours = our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id, ade_seg, depth, K=K, R=R, t=t, frame=good_entry)
-        #vertices, connections, vertices_3d = vertices_ours, connections_ours, vertices_3d_ours
         # Get 2D vertices and edges first
-        vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=25.)
         #gt_verts = []
         #gt_verts, gt_connects, gt_verts3d = get_gt_vertices_and_edges(good_entry, i, depth, colmap_rec, K, R, t, img_id, ade_seg)
@@ -444,7 +444,7 @@ def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
             continue
         # Call the refactored function to get 3D points
-        vertices_3d = create_3d_wireframe_single_image(vertices, connections, depth, colmap_rec, img_id, ade_seg, K, R, t)
         #vertices_3d = gt_verts3d
         # Store original 2D vertices, connections, and computed 3D points
@@ -491,28 +491,7 @@ def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
     return all_3d_vertices_clean, connections_3d_clean
-def our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id_substring, ade_seg, depth, K=None, R=None, t=None, ):
-    """
-    Identify apex and eave-end vertices, then detect lines for eave/ridge/rake/valley.
-    Also find all COLMAP points that project into apex or eave_end masks.
-    """
-    #--------------------------------------------------------------------------------
-    # Step A: Collect apex and eave_end vertices
-    #--------------------------------------------------------------------------------
-    if not isinstance(gest_seg_np, np.ndarray):
-        gest_seg_np = np.array(gest_seg_np)
-    # Apex
-    apex_color = np.array(gestalt_color_mapping['apex'])
-    apex_mask = cv2.inRange(gest_seg_np, apex_color-10., apex_color+10.)
-    # Eave end
-    eave_end_color = np.array(gestalt_color_mapping['eave_end_point'])
-    eave_end_mask = cv2.inRange(gest_seg_np, eave_end_color-10, eave_end_color+10)
-    H, W = gest_seg_np.shape[:2]
     # 1) Find the matching COLMAP image to get its associated 3D points
     # This part remains to identify which 3D points are relevant for this image view
     found_img = None
@@ -548,6 +527,9 @@ def our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id_substring, ade_se
     points_cam_h = (world_to_cam_mat @ points_xyz_world_h.T).T # (N, 4)
     points_cam = points_cam_h[:, :3] / points_cam_h[:, 3, np.newaxis] # (N, 3) in camera coordinates
     uv = []
     valid_indices = []  # Track which original points are valid
@@ -569,7 +551,12 @@ def our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id_substring, ade_se
         if 0 <= u_i_int < W and 0 <= v_i_int < H:
             uv.append((u_i_int, v_i_int))
             valid_indices.append(i)  # Store original index
     uv_colmap = []
     valid_indices_colmap = []
     for i, xyz in enumerate(points_xyz_world):
@@ -583,31 +570,27 @@ def our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id_substring, ade_se
                 uv_colmap.append((u_i, v_i))
                 valid_indices_colmap.append(i)  # Store original index
-    if not uv:
-        print(f"No points projected into image bounds for {img_id_substring} using K,R,t.")
-        return [], [], []
-    house_mask = get_house_mask(ade_seg)
-    uv = np.array(uv, dtype=int)
-    valid_indices = np.array(valid_indices)
-    # Filter points that fall within the apex or eave_end masks
     filtered_points_xyz = []
     filtered_point_idxs = []
     filtered_points_color = []
     filtered_vertices_apex = []
     filtered_vertices_apex_uv = []
-    filtered_vertices_eave_end = []
-    filtered_vertices_eave_end_uv = []
-    # Apex
-    apex_color = np.array(gestalt_color_mapping['apex'])
-    apex_mask = cv2.inRange(gest_seg_np, apex_color-10., apex_color+10.)
     if apex_mask.sum() > 0:
         output = cv2.connectedComponentsWithStats(apex_mask, 8, cv2.CV_32S)
         (numLabels, labels, stats, centroids) = output
-        stats, centroids = stats[1:], centroids[1:]
         for i in range(1, numLabels):
             cur_mask = labels == i
             # Dilate the current mask to make it slightly larger
@@ -623,72 +606,7 @@ def our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id_substring, ade_se
                     valid_points_mask = cur_mask[uv[:, 1], uv[:, 0]] & house_mask[uv[:, 1], uv[:, 0]]
                 else:
                     break
-                #
-            if np.any(valid_points_mask):
-                # Get indices of valid points
-                valid_point_indices = valid_indices[valid_points_mask]
-                # Get 3D points in camera coordinates for depth filtering
-                valid_world_points = points_xyz_world[valid_point_indices]
-                valid_cam_points = points_cam[valid_point_indices]
-                # Compute depths (Z coordinates in camera space)
-                depths = valid_cam_points[:, 2]
-                # Find minimum depth and filter points within min_depth + 2 meters
-                if len(depths) > 0:
-                    min_depth = np.min(depths)
-                    depth_filter = depths <= (min_depth + 2.0)
-                    # Apply depth filter
-                    final_valid_indices = valid_point_indices[depth_filter]
-                    # Add corresponding points to filtered lists
-                    filtered_points_xyz.extend(points_xyz_world[final_valid_indices])
-                    filtered_point_idxs.extend(points_idxs[final_valid_indices])
-                    filtered_points_color.extend([color] * np.sum(depth_filter))
-                    # Find the point with lowest depth in the filtered points
-                    if len(final_valid_indices) > 0:
-                        lowest_depth_idx = np.argmin(depths[depth_filter])
-                        lowest_depth_point = final_valid_indices[lowest_depth_idx]
-                        filtered_vertices_apex.append(points_xyz_world[lowest_depth_point])
-                        filtered_points_xyz.append(points_xyz_world[lowest_depth_point])
-                        filtered_point_idxs.append(points_idxs[lowest_depth_point])
-                        filtered_points_color.append(np.array([1., 1., 0.]))
-                        # Project the lowest depth point back to image coordinates for visualization
-                        lowest_cam_point = points_cam[lowest_depth_point]
-                        # Project to image plane using K
-                        u_proj = (K[0, 0] * lowest_cam_point[0] / lowest_cam_point[2]) + K[0, 2]
-                        v_proj = (K[1, 1] * lowest_cam_point[1] / lowest_cam_point[2]) + K[1, 2]
-                        u_proj_int = int(round(u_proj))
-                        v_proj_int = int(round(v_proj))
-                        filtered_vertices_apex_uv.append((u_proj_int, v_proj_int))
-    # Eave end
-    eave_end_color = np.array(gestalt_color_mapping['eave_end_point'])
-    eave_end_mask = cv2.inRange(gest_seg_np, eave_end_color-10, eave_end_color+10)
-    if eave_end_mask.sum() > 0:
-        output = cv2.connectedComponentsWithStats(eave_end_mask, 8, cv2.CV_32S)
-        (numLabels, labels, stats, centroids) = output
-        stats, centroids = stats[1:], centroids[1:]
-        for i in range(1, numLabels):
-            cur_mask = labels == i
-            kernel = np.ones((5,5), np.uint8)
-            cur_mask = cv2.dilate(cur_mask.astype(np.uint8), kernel, iterations=2).astype(bool)
-            color = np.random.rand(3)
-            valid_points_mask = cur_mask[uv[:, 1], uv[:, 0]] & house_mask[uv[:, 1], uv[:, 0]]
-            for z in range(5):
-                if np.sum(valid_points_mask) < 5:
-                    cur_mask = cv2.dilate(cur_mask.astype(np.uint8), kernel, iterations=1).astype(bool)
-                    valid_points_mask = cur_mask[uv[:, 1], uv[:, 0]] & house_mask[uv[:, 1], uv[:, 0]]
-                else:
-                    break
             if np.any(valid_points_mask):
                 # Get indices of valid points
                 valid_point_indices = valid_indices[valid_points_mask]
@@ -717,38 +635,35 @@ def our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id_substring, ade_se
                     if len(final_valid_indices) > 0:
                         lowest_depth_idx = np.argmin(depths[depth_filter])
                         lowest_depth_point = final_valid_indices[lowest_depth_idx]
-                        filtered_vertices_eave_end.append(points_xyz_world[lowest_depth_point])
                         filtered_points_xyz.append(points_xyz_world[lowest_depth_point])
                         filtered_point_idxs.append(points_idxs[lowest_depth_point])
                         filtered_points_color.append(np.array([1., 1., 0.]))
-                        # Project the lowest depth point back to image coordinates for visualization
-                        lowest_cam_point = points_cam[lowest_depth_point]
-                        # Project to image plane using K
-                        u_proj = (K[0, 0] * lowest_cam_point[0] / lowest_cam_point[2]) + K[0, 2]
-                        v_proj = (K[1, 1] * lowest_cam_point[1] / lowest_cam_point[2]) + K[1, 2]
-                        u_proj_int = int(round(u_proj))
-                        v_proj_int = int(round(v_proj))
-                        filtered_vertices_eave_end_uv.append((u_proj_int, v_proj_int))
-    '''
-    for i, (u, v) in enumerate(uv):
-    # Check if this projected point falls within the combined maskvalid_indices
-        if combined_mask[v, u] > 0 and house_mask[v, u] > 0:
-            original_idx = valid_indices[i]  # Get original index
-            filtered_points_xyz.append(points_xyz_world[original_idx])
-            filtered_point_idxs.append(points_idxs[original_idx])
-    '''
     filtered_points_xyz = np.array(filtered_points_xyz[::-1]) if filtered_points_xyz else np.empty((0, 3))
     filtered_point_idxs = np.array(filtered_point_idxs[::-1]) if filtered_point_idxs else np.empty((0,))
     filtered_points_color = np.array(filtered_points_color[::-1]) if filtered_points_color else np.empty((0, 3))
     filtered_vertices_apex = np.array(filtered_vertices_apex) if filtered_vertices_apex else np.empty((0, 3))
-    filtered_vertices_eave_end = np.array(filtered_vertices_eave_end) if filtered_vertices_eave_end else np.empty((0, 3))
     connections = []
     edge_classes = ['eave', 'ridge', 'rake', 'valley']
     edge_th = 25.0  # threshold for proximity to line segments
@@ -765,7 +680,7 @@ def our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id_substring, ade_se
         vertex_types.append('apex')
     # Add eave_end vertices
-    for i, (vertex_3d, vertex_uv) in enumerate(zip(filtered_vertices_eave_end, filtered_vertices_eave_end_uv)):
         all_vertices_3d.append(vertex_3d)
         all_vertices_uv.append(vertex_uv)
         vertex_types.append('eave_end')
@@ -840,42 +755,17 @@ def our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id_substring, ade_se
                     if conn not in connections:
                         connections.append(conn)
-    '''
-    depth_fitted, depth_sparse, _, col_img = get_fitted_dense_depth(depth, colmap_rec, img_id_substring, ade_seg, K, R, t)
-    # Segment the depth_fitted to get points in apex/eave_end regions
-    segmented_points_3d = []
-    # Get coordinates where the combined mask is active
-    mask_coords = np.where(combined_mask > 0)
-    v_coords, u_coords = mask_coords
-    # Also apply house mask for additional filtering
-    house_coords = np.where(house_mask > 0)
-    house_v, house_u = house_coords
-    # Find intersection of combined_mask and house_mask
-    valid_mask = np.logical_and(combined_mask > 0, house_mask > 0)
-    valid_coords = np.where(valid_mask)
-    v_valid, u_valid = valid_coords
-    if len(v_valid) > 0:
-        # Get depth values at these coordinates
-        depth_values = depth_fitted[v_valid, u_valid]
-        # Filter out zero or invalid depth values
-        valid_depth_mask = depth_values > 0
-        if np.any(valid_depth_mask):
-            u_final = u_valid[valid_depth_mask]
-            v_final = v_valid[valid_depth_mask]
-            depth_final = depth_values[valid_depth_mask]
-            # Create UV coordinates for backprojection
-            uv_depth = np.column_stack((u_final, v_final))
-            # Backproject to 3D world coordinates
-            segmented_points_3d = project_vertices_to_3d(uv_depth, depth_final, col_img, K, R, t)
-    '''
     segmented_points_3d = []
     # Visualize with the segmented depth points in blue
@@ -912,12 +802,42 @@ def our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id_substring, ade_se
         geometries.append(pcd_depth)
     #o3d.visualization.draw_geometries(geometries, window_name=f"Combined Point Cloud - {img_id_substring}")
-    # Convert all_vertices_uv and vertex_types to the required format
-    vertices_formatted = []
-    for uv, vertex_type in zip(all_vertices_uv, vertex_types):
-        vertices_formatted.append({
-            'xy': np.array(uv, dtype=float),
-            'type': vertex_type
-        })
     return vertices_formatted, connections, all_vertices_3d

         gest_seg = gest.resize(depth_size)
         gest_seg_np = np.array(gest_seg).astype(np.uint8)
+        vertices_ours, connections_ours, vertices_3d_ours = our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id, ade_seg, depth, K=K, R=R, t=t)
+        vertices, connections, vertices_3d = vertices_ours, connections_ours, vertices_3d_ours
         # Get 2D vertices and edges first
+        #vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=25.)
         #gt_verts = []
         #gt_verts, gt_connects, gt_verts3d = get_gt_vertices_and_edges(good_entry, i, depth, colmap_rec, K, R, t, img_id, ade_seg)
             continue
         # Call the refactored function to get 3D points
+        #vertices_3d = create_3d_wireframe_single_image(vertices, connections, depth, colmap_rec, img_id, ade_seg, K, R, t)
         #vertices_3d = gt_verts3d
         # Store original 2D vertices, connections, and computed 3D points
     return all_3d_vertices_clean, connections_3d_clean
+def get_visible_points(colmap_rec, img_id_substring, R=None, t=None):
     # 1) Find the matching COLMAP image to get its associated 3D points
     # This part remains to identify which 3D points are relevant for this image view
     found_img = None
     points_cam_h = (world_to_cam_mat @ points_xyz_world_h.T).T # (N, 4)
     points_cam = points_cam_h[:, :3] / points_cam_h[:, 3, np.newaxis] # (N, 3) in camera coordinates
+    return points_cam, points_xyz_world, points_idxs
+def project_points_to_2d(points_cam, K, H, W):
     uv = []
     valid_indices = []  # Track which original points are valid
         if 0 <= u_i_int < W and 0 <= v_i_int < H:
             uv.append((u_i_int, v_i_int))
             valid_indices.append(i)  # Store original index
+    uv = np.array(uv, dtype=int)     # shape (M,2)
+    valid_indices = np.array(valid_indices)  # shape (M,)
+    return uv, valid_indices
+def project_points_to_2d_colmap(points_xyz_world, found_img, H, W):
     uv_colmap = []
     valid_indices_colmap = []
     for i, xyz in enumerate(points_xyz_world):
                 uv_colmap.append((u_i, v_i))
                 valid_indices_colmap.append(i)  # Store original index
+    uv_colmap = np.array(uv_colmap, dtype=int)
+    valid_indices_colmap = np.array(valid_indices_colmap)
+    return uv_colmap, valid_indices_colmap
+def get_apex_or_eave_points(apex, uv, gest_seg_np, house_mask, valid_indices, points_xyz_world, points_cam, points_idxs):
+    # Apex
+    if apex:
+        apex_color = np.array(gestalt_color_mapping['apex'])
+    else:
+        apex_color = np.array(gestalt_color_mapping['eave_end_point'])
+    apex_mask = cv2.inRange(gest_seg_np, apex_color-10., apex_color+10.)
     filtered_points_xyz = []
     filtered_point_idxs = []
     filtered_points_color = []
     filtered_vertices_apex = []
     filtered_vertices_apex_uv = []
     if apex_mask.sum() > 0:
         output = cv2.connectedComponentsWithStats(apex_mask, 8, cv2.CV_32S)
         (numLabels, labels, stats, centroids) = output
         for i in range(1, numLabels):
             cur_mask = labels == i
             # Dilate the current mask to make it slightly larger
                     valid_points_mask = cur_mask[uv[:, 1], uv[:, 0]] & house_mask[uv[:, 1], uv[:, 0]]
                 else:
                     break
             if np.any(valid_points_mask):
                 # Get indices of valid points
                 valid_point_indices = valid_indices[valid_points_mask]
                     if len(final_valid_indices) > 0:
                         lowest_depth_idx = np.argmin(depths[depth_filter])
                         lowest_depth_point = final_valid_indices[lowest_depth_idx]
+                        filtered_vertices_apex.append(points_xyz_world[lowest_depth_point])
                         filtered_points_xyz.append(points_xyz_world[lowest_depth_point])
                         filtered_point_idxs.append(points_idxs[lowest_depth_point])
                         filtered_points_color.append(np.array([1., 1., 0.]))
+                        filtered_vertices_apex_uv.append(centroids[i])
+    return filtered_points_xyz, filtered_point_idxs, filtered_points_color, filtered_vertices_apex, filtered_vertices_apex_uv
+def get_vertexes(uv, gest_seg_np, house_mask, valid_indices, points_xyz_world, points_cam, points_idxs):
+    filtered_points_xyz_apex, filtered_point_idxs_apex, filtered_points_color_apex, filtered_vertices_apex, filtered_vertices_apex_uv = get_apex_or_eave_points(True, uv, gest_seg_np, house_mask, valid_indices, points_xyz_world, points_cam, points_idxs)
+    filtered_points_xyz_eave, filtered_point_idxs_eave, filtered_points_color_eave, filtered_vertices_eave, filtered_vertices_eave_uv = get_apex_or_eave_points(False, uv, gest_seg_np, house_mask, valid_indices, points_xyz_world, points_cam, points_idxs)
+    # Combine filtered points from both apex and eave_end
+    filtered_points_xyz = filtered_points_xyz_apex + filtered_points_xyz_eave
+    filtered_point_idxs = filtered_point_idxs_apex + filtered_point_idxs_eave
+    filtered_points_color = filtered_points_color_apex + filtered_points_color_eave
     filtered_points_xyz = np.array(filtered_points_xyz[::-1]) if filtered_points_xyz else np.empty((0, 3))
     filtered_point_idxs = np.array(filtered_point_idxs[::-1]) if filtered_point_idxs else np.empty((0,))
     filtered_points_color = np.array(filtered_points_color[::-1]) if filtered_points_color else np.empty((0, 3))
     filtered_vertices_apex = np.array(filtered_vertices_apex) if filtered_vertices_apex else np.empty((0, 3))
+    filtered_vertices_apex_uv = np.array(filtered_vertices_apex_uv) if filtered_vertices_apex_uv else np.empty((0, 2))
+    filtered_vertices_eave = np.array(filtered_vertices_eave) if filtered_vertices_eave else np.empty((0, 3))
+    filtered_vertices_eave_uv = np.array(filtered_vertices_eave_uv) if filtered_vertices_eave_uv else np.empty((0, 2))
+    return filtered_points_xyz, filtered_point_idxs, filtered_points_color, filtered_vertices_apex, filtered_vertices_apex_uv, filtered_vertices_eave, filtered_vertices_eave_uv
+def get_connections(gest_seg_np, filtered_vertices_apex, filtered_vertices_eave, filtered_vertices_apex_uv, filtered_vertices_eave_uv):
     connections = []
     edge_classes = ['eave', 'ridge', 'rake', 'valley']
     edge_th = 25.0  # threshold for proximity to line segments
         vertex_types.append('apex')
     # Add eave_end vertices
+    for i, (vertex_3d, vertex_uv) in enumerate(zip(filtered_vertices_eave, filtered_vertices_eave_uv)):
         all_vertices_3d.append(vertex_3d)
         all_vertices_uv.append(vertex_uv)
         vertex_types.append('eave_end')
                     if conn not in connections:
                         connections.append(conn)
+    # Convert all_vertices_uv and vertex_types to the required format
+    vertices_formatted = []
+    for uv, vertex_type in zip(all_vertices_uv, vertex_types):
+        vertices_formatted.append({
+            'xy': np.array(uv, dtype=float),
+            'type': vertex_type
+        })
+    return vertices_formatted, connections, all_vertices_3d
+def visualize_3d_wireframe(colmap_rec, filtered_points_xyz, filtered_points_color, vertices_3d, connections):
     segmented_points_3d = []
     # Visualize with the segmented depth points in blue
         geometries.append(pcd_depth)
     #o3d.visualization.draw_geometries(geometries, window_name=f"Combined Point Cloud - {img_id_substring}")
+def our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id_substring, ade_seg, depth, K=None, R=None, t=None, ):
+    """
+    Identify apex and eave-end vertices, then detect lines for eave/ridge/rake/valley.
+    Also find all COLMAP points that project into apex or eave_end masks.
+    """
+    #--------------------------------------------------------------------------------
+    # Step A: Collect apex and eave_end vertices
+    #--------------------------------------------------------------------------------
+    if not isinstance(gest_seg_np, np.ndarray):
+        gest_seg_np = np.array(gest_seg_np)
+    # Apex
+    apex_color = np.array(gestalt_color_mapping['apex'])
+    apex_mask = cv2.inRange(gest_seg_np, apex_color-10., apex_color+10.)
+    # Eave end
+    eave_end_color = np.array(gestalt_color_mapping['eave_end_point'])
+    eave_end_mask = cv2.inRange(gest_seg_np, eave_end_color-10, eave_end_color+10)
+    H, W = gest_seg_np.shape[:2]
+    points_cam, points_xyz_world, points_idxs = get_visible_points(colmap_rec, img_id_substring, R=R, t=t)
+    uv, valid_indices = project_points_to_2d(points_cam, K, H, W)
+    if len(uv) == 0:
+        print(f"No points projected into image bounds for {img_id_substring} using K,R,t.")
+        return [], [], []
+    house_mask = get_house_mask(ade_seg)
+    filtered_points_xyz, filtered_point_idxs, filtered_points_color, filtered_vertices_apex, filtered_vertices_apex_uv, filtered_vertices_eave, filtered_vertices_eave_uv = get_vertexes(uv, gest_seg_np, house_mask, valid_indices, points_xyz_world, points_cam, points_idxs)
+    vertices_formatted, connections, all_vertices_3d = get_connections(gest_seg_np, filtered_vertices_apex, filtered_vertices_eave, filtered_vertices_apex_uv, filtered_vertices_eave_uv)
+    #visualize_3d_wireframe(colmap_rec, filtered_points_xyz, filtered_points_color, all_vertices_3d, connections)
     return vertices_formatted, connections, all_vertices_3d

train.py CHANGED Viewed

@@ -14,7 +14,7 @@ from hoho2025.metric_helper import hss
 from predict import predict_wireframe
 ds = load_dataset("usm3d/hoho25k", streaming=True, trust_remote_code=True)
 scores_hss = []
 scores_f1 = []
 scores_iou = []

 from predict import predict_wireframe
 ds = load_dataset("usm3d/hoho25k", streaming=True, trust_remote_code=True)
 scores_hss = []
 scores_f1 = []
 scores_iou = []