jskvrna commited on
Commit
c4db724
·
1 Parent(s): d9fc230

Implements point cloud generation from COLMAP data

Browse files

Adds functionality to create a point cloud from COLMAP reconstruction data, incorporating image information such as camera parameters, ADE segmentation, and gestalt segmentation.

This allows for more detailed analysis and visualization of the reconstructed scene, including semantic information. It also adjusts edge threshold.

Files changed (2) hide show
  1. predict.py +118 -8
  2. train.py +2 -2
predict.py CHANGED
@@ -1203,6 +1203,122 @@ def calculate_cylinder_overlap_volume(cyl1, cyl2):
1203
 
1204
  return max(0.0, overlap_volume)
1205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1206
  def predict_wireframe(entry, pnet_model, voxel_model, pnet_class_model, config) -> Tuple[np.ndarray, List[int]]:
1207
  """
1208
  Predict 3D wireframe from a dataset entry.
@@ -1210,6 +1326,8 @@ def predict_wireframe(entry, pnet_model, voxel_model, pnet_class_model, config)
1210
  good_entry = convert_entry_to_human_readable(entry)
1211
  colmap_rec = good_entry['colmap_binary']
1212
 
 
 
1213
  vertex_threshold = config.get('vertex_threshold', 0.5)
1214
  edge_threshold = config.get('edge_threshold', 0.5)
1215
  only_predicted_connections = config.get('only_predicted_connections', False)
@@ -2043,14 +2161,6 @@ def our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id_substring, ade_se
2043
  if not isinstance(gest_seg_np, np.ndarray):
2044
  gest_seg_np = np.array(gest_seg_np)
2045
 
2046
- # Apex
2047
- apex_color = np.array(gestalt_color_mapping['apex'])
2048
- apex_mask = cv2.inRange(gest_seg_np, apex_color-10., apex_color+10.)
2049
-
2050
- # Eave end
2051
- eave_end_color = np.array(gestalt_color_mapping['eave_end_point'])
2052
- eave_end_mask = cv2.inRange(gest_seg_np, eave_end_color-10, eave_end_color+10)
2053
-
2054
  H, W = gest_seg_np.shape[:2]
2055
 
2056
  # Get camera parameters from COLMAP reconstruction if not provided
 
1203
 
1204
  return max(0.0, overlap_volume)
1205
 
1206
+ def create_pcloud(colmap_rec, frame):
1207
+ all_imgs_ids = []
1208
+ all_imgs = []
1209
+ all_imgs_K = []
1210
+ all_imgs_R = []
1211
+ all_imgs_t = []
1212
+ all_imgs_ade = []
1213
+ all_imgs_gestalt = []
1214
+
1215
+ for img_id_c, col_img_obj in colmap_rec.images.items(): # Renamed col_img to col_img_obj to avoid conflict
1216
+ all_imgs_ids.append(col_img_obj.name)
1217
+ all_imgs.append(col_img_obj)
1218
+
1219
+ for i, (K, R, t, img_id, ade, gestalt, depth) in enumerate(zip(frame['K'], frame['R'], frame['t'], frame['image_ids'], frame['ade'], frame['gestalt'], frame['depth'])):
1220
+ for all_imgsid in all_imgs_ids:
1221
+ if all_imgsid == img_id:
1222
+ all_imgs_K.append(np.array(K))
1223
+ all_imgs_R.append(np.array(R))
1224
+ all_imgs_t.append(np.array(t))
1225
+
1226
+ ade_mask = get_house_mask(ade)
1227
+ all_imgs_ade.append(np.array(ade_mask))
1228
+
1229
+ depth_size = (np.array(depth).shape[1], np.array(depth).shape[0]) # W, H
1230
+ gest_seg = gestalt.resize(depth_size)
1231
+ gest_seg_np = np.array(gest_seg).astype(np.uint8)
1232
+ all_imgs_gestalt.append(np.array(gest_seg_np))
1233
+
1234
+ # 2) Gather 3D points that this image sees (according to COLMAP)
1235
+ points_xyz_world = []
1236
+ points_colors = []
1237
+ points_idxs = []
1238
+ points_imgs = []
1239
+ points_uv = []
1240
+ points_ade = []
1241
+ points_gestalt = []
1242
+
1243
+ for pid, p3D in colmap_rec.points3D.items():
1244
+ found = False
1245
+ found_in_ids = []
1246
+ uv_projections = []
1247
+ in_ade = False
1248
+ gest = []
1249
+
1250
+ for idx, img in enumerate(all_imgs):
1251
+ if img.has_point3D(pid):
1252
+ found = True
1253
+ found_in_ids.append(img.name)
1254
+
1255
+ # Project the 3D point to image coordinates using K, R, t
1256
+ R = all_imgs_R[idx]
1257
+ t = all_imgs_t[idx]
1258
+ K = all_imgs_K[idx]
1259
+
1260
+ xyz_homogeneous = np.append(p3D.xyz, 1.0)
1261
+ world_to_cam_mat = np.hstack([R, t.reshape(3, 1)])
1262
+ cam_coords = world_to_cam_mat @ xyz_homogeneous
1263
+ if cam_coords[2] > 0: # Point is in front of camera
1264
+ pixel_coords = np.dot(K, cam_coords)
1265
+ u = pixel_coords[0] / pixel_coords[2]
1266
+ v = pixel_coords[1] / pixel_coords[2]
1267
+ u = round(u)
1268
+ v = round(v)
1269
+ uv_projections.append((u, v))
1270
+
1271
+ # Check if point is inside ADE segmentation (house mask)
1272
+ if 0 <= u < all_imgs_ade[idx].shape[1] and 0 <= v < all_imgs_ade[idx].shape[0]:
1273
+ in_ade = all_imgs_ade[idx][v, u] # Point is inside house mask
1274
+ else:
1275
+ in_ade = False # Default to False if out of bounds
1276
+
1277
+ # Check gestalt segmentation value at this point
1278
+ if 0 <= u < all_imgs_gestalt[idx].shape[1] and 0 <= v < all_imgs_gestalt[idx].shape[0]:
1279
+ gestalt_value = all_imgs_gestalt[idx][v, u]
1280
+ gest.append(gestalt_value)
1281
+ else:
1282
+ gest.append(np.array([0,0,0])) # Default value for out-of-bounds
1283
+
1284
+ if found:
1285
+ points_xyz_world.append(p3D.xyz) # world coords
1286
+ points_colors.append(p3D.color / 255.0) # normalize to [0,1]
1287
+ points_idxs.append(pid)
1288
+ points_imgs.append(found_in_ids)
1289
+ points_uv.append(uv_projections)
1290
+ points_ade.append(in_ade)
1291
+ points_gestalt.append(gest)
1292
+
1293
+ points_xyz_world = np.array(points_xyz_world) if points_xyz_world else np.empty((0, 3))
1294
+ points_colors = np.array(points_colors) if points_colors else np.empty((0, 3))
1295
+ points_idxs = np.array(points_idxs) if points_idxs else np.empty((0,))
1296
+ points_ade = np.array(points_ade) if points_ade else np.empty((0,))
1297
+
1298
+ # Create 7D point cloud from COLMAP data (xyz + rgb + img_count)
1299
+ if points_xyz_world.shape[0] > 0:
1300
+ colmap_points_7d = np.zeros((len(points_xyz_world), 7))
1301
+ colmap_points_7d[:, :3] = points_xyz_world # xyz coordinates
1302
+ colmap_points_7d[:, 3:6] = points_colors # rgb colors
1303
+ colmap_points_7d[:, 6] = points_idxs
1304
+
1305
+ whole_pcloud = {'points_7d': colmap_points_7d,
1306
+ 'imgs': points_imgs,
1307
+ 'uv': points_uv,
1308
+ 'all_imgs_ids': all_imgs_ids,
1309
+ 'all_imgs_K': all_imgs_K,
1310
+ 'all_imgs_R': all_imgs_R,
1311
+ 'all_imgs_t': all_imgs_t,
1312
+ 'ade': points_ade,
1313
+ 'gestalt': points_gestalt}
1314
+ else:
1315
+ whole_pcloud = {'points_7d': np.empty((0, 7)),
1316
+ 'ids': np.empty((0,)),
1317
+ 'imgs': [],
1318
+ 'uv': []}
1319
+
1320
+ return whole_pcloud
1321
+
1322
  def predict_wireframe(entry, pnet_model, voxel_model, pnet_class_model, config) -> Tuple[np.ndarray, List[int]]:
1323
  """
1324
  Predict 3D wireframe from a dataset entry.
 
1326
  good_entry = convert_entry_to_human_readable(entry)
1327
  colmap_rec = good_entry['colmap_binary']
1328
 
1329
+ colmap_pcloud = create_pcloud(colmap_rec, good_entry)
1330
+
1331
  vertex_threshold = config.get('vertex_threshold', 0.5)
1332
  edge_threshold = config.get('edge_threshold', 0.5)
1333
  only_predicted_connections = config.get('only_predicted_connections', False)
 
2161
  if not isinstance(gest_seg_np, np.ndarray):
2162
  gest_seg_np = np.array(gest_seg_np)
2163
 
 
 
 
 
 
 
 
 
2164
  H, W = gest_seg_np.shape[:2]
2165
 
2166
  # Get camera parameters from COLMAP reconstruction if not provided
train.py CHANGED
@@ -40,13 +40,13 @@ pnet_class_model = load_pointnet_class_model(model_path="/home/skvrnjan/personal
40
  #voxel_model = load_3dcnn_model(model_path="/home/skvrnjan/personal/hoho_voxel/initial_epoch_100.pth", device=device, predict_score=True)
41
  voxel_model = None
42
 
43
- config = {'vertex_threshold': 0.4, 'edge_threshold': 0.7, 'only_predicted_connections': False}
44
 
45
  idx = 0
46
  for a in tqdm(ds['train'], desc="Processing dataset"):
47
  #plot_all_modalities(a)
48
  #pred_vertices, pred_edges = predict_wireframe_old(a)
49
- #pred_vertices, pred_edges = predict_wireframe(a, pnet_model, voxel_model, pnet_class_model)
50
  try:
51
  pred_vertices, pred_edges = predict_wireframe(a, pnet_model, voxel_model, pnet_class_model, config)
52
  #pred_vertices, pred_edges = predict_wireframe_old(a)
 
40
  #voxel_model = load_3dcnn_model(model_path="/home/skvrnjan/personal/hoho_voxel/initial_epoch_100.pth", device=device, predict_score=True)
41
  voxel_model = None
42
 
43
+ config = {'vertex_threshold': 0.4, 'edge_threshold': 0.6, 'only_predicted_connections': False}
44
 
45
  idx = 0
46
  for a in tqdm(ds['train'], desc="Processing dataset"):
47
  #plot_all_modalities(a)
48
  #pred_vertices, pred_edges = predict_wireframe_old(a)
49
+ pred_vertices, pred_edges = predict_wireframe(a, pnet_model, voxel_model, pnet_class_model, config)
50
  try:
51
  pred_vertices, pred_edges = predict_wireframe(a, pnet_model, voxel_model, pnet_class_model, config)
52
  #pred_vertices, pred_edges = predict_wireframe_old(a)