Implements point cloud generation from COLMAP data

Adds functionality to create a point cloud from COLMAP reconstruction data, incorporating image information such as camera parameters, ADE segmentation, and gestalt segmentation.

This allows for more detailed analysis and visualization of the reconstructed scene, including semantic information. It also adjusts edge threshold.

Files changed (2) hide show

predict.py +118 -8
train.py +2 -2

predict.py CHANGED Viewed

@@ -1203,6 +1203,122 @@ def calculate_cylinder_overlap_volume(cyl1, cyl2):
     return max(0.0, overlap_volume)
 def predict_wireframe(entry, pnet_model, voxel_model, pnet_class_model, config) -> Tuple[np.ndarray, List[int]]:
     """
     Predict 3D wireframe from a dataset entry.
@@ -1210,6 +1326,8 @@ def predict_wireframe(entry, pnet_model, voxel_model, pnet_class_model, config)
     good_entry = convert_entry_to_human_readable(entry)
     colmap_rec = good_entry['colmap_binary']
     vertex_threshold = config.get('vertex_threshold', 0.5)
     edge_threshold = config.get('edge_threshold', 0.5)
     only_predicted_connections = config.get('only_predicted_connections', False)
@@ -2043,14 +2161,6 @@ def our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id_substring, ade_se
     if not isinstance(gest_seg_np, np.ndarray):
         gest_seg_np = np.array(gest_seg_np)
-    # Apex
-    apex_color = np.array(gestalt_color_mapping['apex'])
-    apex_mask = cv2.inRange(gest_seg_np, apex_color-10., apex_color+10.)
-    # Eave end
-    eave_end_color = np.array(gestalt_color_mapping['eave_end_point'])
-    eave_end_mask = cv2.inRange(gest_seg_np, eave_end_color-10, eave_end_color+10)
     H, W = gest_seg_np.shape[:2]
     # Get camera parameters from COLMAP reconstruction if not provided

     return max(0.0, overlap_volume)
+def create_pcloud(colmap_rec, frame):
+    all_imgs_ids = []
+    all_imgs = []
+    all_imgs_K = []
+    all_imgs_R = []
+    all_imgs_t = []
+    all_imgs_ade = []
+    all_imgs_gestalt = []
+    for img_id_c, col_img_obj in colmap_rec.images.items(): # Renamed col_img to col_img_obj to avoid conflict
+        all_imgs_ids.append(col_img_obj.name)
+        all_imgs.append(col_img_obj)
+    for i, (K, R, t, img_id, ade, gestalt, depth) in enumerate(zip(frame['K'], frame['R'], frame['t'], frame['image_ids'], frame['ade'], frame['gestalt'], frame['depth'])):
+        for all_imgsid in all_imgs_ids:
+            if all_imgsid == img_id:
+                all_imgs_K.append(np.array(K))
+                all_imgs_R.append(np.array(R))
+                all_imgs_t.append(np.array(t))
+                ade_mask = get_house_mask(ade)
+                all_imgs_ade.append(np.array(ade_mask))
+                depth_size = (np.array(depth).shape[1], np.array(depth).shape[0]) # W, H
+                gest_seg = gestalt.resize(depth_size)
+                gest_seg_np = np.array(gest_seg).astype(np.uint8)
+                all_imgs_gestalt.append(np.array(gest_seg_np))
+    # 2) Gather 3D points that this image sees (according to COLMAP)
+    points_xyz_world = []
+    points_colors = []
+    points_idxs = []
+    points_imgs = []
+    points_uv = []
+    points_ade = []
+    points_gestalt = []
+    for pid, p3D in colmap_rec.points3D.items():
+        found = False
+        found_in_ids = []
+        uv_projections = []
+        in_ade = False
+        gest = []
+        for idx, img in enumerate(all_imgs):
+            if img.has_point3D(pid):
+                found = True
+                found_in_ids.append(img.name)
+                # Project the 3D point to image coordinates using K, R, t
+                R = all_imgs_R[idx]
+                t = all_imgs_t[idx]
+                K = all_imgs_K[idx]
+                xyz_homogeneous = np.append(p3D.xyz, 1.0)
+                world_to_cam_mat = np.hstack([R, t.reshape(3, 1)])
+                cam_coords = world_to_cam_mat @ xyz_homogeneous
+                if cam_coords[2] > 0:  # Point is in front of camera
+                    pixel_coords = np.dot(K, cam_coords)
+                    u = pixel_coords[0] / pixel_coords[2]
+                    v = pixel_coords[1] / pixel_coords[2]
+                    u = round(u)
+                    v = round(v)
+                    uv_projections.append((u, v))
+                    # Check if point is inside ADE segmentation (house mask)
+                    if 0 <= u < all_imgs_ade[idx].shape[1] and 0 <= v < all_imgs_ade[idx].shape[0]:
+                        in_ade = all_imgs_ade[idx][v, u]  # Point is inside house mask
+                    else:
+                        in_ade = False  # Default to False if out of bounds
+                    # Check gestalt segmentation value at this point
+                    if 0 <= u < all_imgs_gestalt[idx].shape[1] and 0 <= v < all_imgs_gestalt[idx].shape[0]:
+                        gestalt_value = all_imgs_gestalt[idx][v, u]
+                        gest.append(gestalt_value)
+                    else:
+                        gest.append(np.array([0,0,0]))  # Default value for out-of-bounds
+        if found:
+            points_xyz_world.append(p3D.xyz)  # world coords
+            points_colors.append(p3D.color / 255.0)  # normalize to [0,1]
+            points_idxs.append(pid)
+            points_imgs.append(found_in_ids)
+            points_uv.append(uv_projections)
+            points_ade.append(in_ade)
+            points_gestalt.append(gest)
+    points_xyz_world = np.array(points_xyz_world) if points_xyz_world else np.empty((0, 3))
+    points_colors = np.array(points_colors) if points_colors else np.empty((0, 3))
+    points_idxs = np.array(points_idxs) if points_idxs else np.empty((0,))
+    points_ade = np.array(points_ade) if points_ade else np.empty((0,))
+    # Create 7D point cloud from COLMAP data (xyz + rgb + img_count)
+    if points_xyz_world.shape[0] > 0:
+        colmap_points_7d = np.zeros((len(points_xyz_world), 7))
+        colmap_points_7d[:, :3] = points_xyz_world  # xyz coordinates
+        colmap_points_7d[:, 3:6] = points_colors    # rgb colors
+        colmap_points_7d[:, 6] = points_idxs
+        whole_pcloud = {'points_7d': colmap_points_7d,
+                        'imgs': points_imgs,
+                        'uv': points_uv,
+                        'all_imgs_ids': all_imgs_ids,
+                        'all_imgs_K': all_imgs_K,
+                        'all_imgs_R': all_imgs_R,
+                        'all_imgs_t': all_imgs_t,
+                        'ade': points_ade,
+                        'gestalt': points_gestalt}
+    else:
+        whole_pcloud = {'points_7d': np.empty((0, 7)),
+                        'ids': np.empty((0,)),
+                        'imgs': [],
+                        'uv': []}
+    return whole_pcloud
 def predict_wireframe(entry, pnet_model, voxel_model, pnet_class_model, config) -> Tuple[np.ndarray, List[int]]:
     """
     Predict 3D wireframe from a dataset entry.
     good_entry = convert_entry_to_human_readable(entry)
     colmap_rec = good_entry['colmap_binary']
+    colmap_pcloud = create_pcloud(colmap_rec, good_entry)
     vertex_threshold = config.get('vertex_threshold', 0.5)
     edge_threshold = config.get('edge_threshold', 0.5)
     only_predicted_connections = config.get('only_predicted_connections', False)
     if not isinstance(gest_seg_np, np.ndarray):
         gest_seg_np = np.array(gest_seg_np)
     H, W = gest_seg_np.shape[:2]
     # Get camera parameters from COLMAP reconstruction if not provided

train.py CHANGED Viewed

@@ -40,13 +40,13 @@ pnet_class_model = load_pointnet_class_model(model_path="/home/skvrnjan/personal
 #voxel_model = load_3dcnn_model(model_path="/home/skvrnjan/personal/hoho_voxel/initial_epoch_100.pth", device=device, predict_score=True)
 voxel_model = None
-config = {'vertex_threshold': 0.4, 'edge_threshold': 0.7, 'only_predicted_connections': False}
 idx = 0
 for a in tqdm(ds['train'], desc="Processing dataset"):
     #plot_all_modalities(a)
     #pred_vertices, pred_edges = predict_wireframe_old(a)
-    #pred_vertices, pred_edges = predict_wireframe(a, pnet_model, voxel_model, pnet_class_model)
     try:
         pred_vertices, pred_edges = predict_wireframe(a, pnet_model, voxel_model, pnet_class_model, config)
         #pred_vertices, pred_edges = predict_wireframe_old(a)

 #voxel_model = load_3dcnn_model(model_path="/home/skvrnjan/personal/hoho_voxel/initial_epoch_100.pth", device=device, predict_score=True)
 voxel_model = None
+config = {'vertex_threshold': 0.4, 'edge_threshold': 0.6, 'only_predicted_connections': False}
 idx = 0
 for a in tqdm(ds['train'], desc="Processing dataset"):
     #plot_all_modalities(a)
     #pred_vertices, pred_edges = predict_wireframe_old(a)
+    pred_vertices, pred_edges = predict_wireframe(a, pnet_model, voxel_model, pnet_class_model, config)
     try:
         pred_vertices, pred_edges = predict_wireframe(a, pnet_model, voxel_model, pnet_class_model, config)
         #pred_vertices, pred_edges = predict_wireframe_old(a)