dmytromishkin commited on
Commit
d7ef9b2
·
unverified ·
1 Parent(s): dd47670

bugfixes, move setup.py to pyproject.toml

Browse files
hoho2025/example_solutions.py CHANGED
@@ -8,7 +8,6 @@ import cv2
8
  import numpy as np
9
  import pycolmap
10
  from PIL import Image as PImage
11
- from scipy.spatial.distance import cdist
12
 
13
  from hoho2025.color_mappings import ade20k_color_mapping, gestalt_color_mapping
14
 
@@ -19,10 +18,10 @@ def empty_solution():
19
 
20
 
21
  def read_colmap_rec(colmap_data):
 
22
  with tempfile.TemporaryDirectory() as tmpdir:
23
  with zipfile.ZipFile(io.BytesIO(colmap_data), "r") as zf:
24
- zf.extractall(tmpdir) # unpacks cameras.txt, images.txt, etc. to tmpdir
25
- # Now parse with pycolmap
26
  rec = pycolmap.Reconstruction(tmpdir)
27
  return rec
28
 
@@ -58,14 +57,21 @@ def _colmap_project_point(img, cam, xyz):
58
  return (u, v), p_cam[2]
59
 
60
  def convert_entry_to_human_readable(entry):
 
 
 
 
 
 
 
61
  out = {}
62
  for k, v in entry.items():
63
- if 'colmap' in k and k!= 'pose_only_in_colmap':
64
  out[k] = read_colmap_rec(v)
65
- elif k in ['wf_vertices', 'wf_edges', 'K', 'R', 't', 'depth']:
66
  out[k] = np.array(v)
67
  else:
68
- out[k]=v
69
  out['__key__'] = entry['order_id']
70
  return out
71
 
@@ -107,116 +113,79 @@ def point_to_segment_dist(pt, seg_p1, seg_p2):
107
 
108
 
109
  def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=25.0):
 
 
 
 
 
 
 
 
 
110
  """
111
- Identify apex and eave-end vertices, then detect lines for eave/ridge/rake/valley.
112
- For each connected component, we do a line fit with cv2.fitLine, then measure
113
- segment endpoints more robustly. We then associate apex points that are within
114
- 'edge_th' of the line segment. We record those apex–apex connections for edges
115
- if at least 2 apexes lie near the same component line.
116
- """
117
- #--------------------------------------------------------------------------------
118
- # Step A: Collect apex and eave_end vertices
119
- #--------------------------------------------------------------------------------
120
  if not isinstance(gest_seg_np, np.ndarray):
121
  gest_seg_np = np.array(gest_seg_np)
 
 
122
  vertices = []
123
- # Apex
124
- apex_color = np.array(gestalt_color_mapping['apex'])
125
- apex_mask = cv2.inRange(gest_seg_np, apex_color-0.5, apex_color+0.5)
126
- if apex_mask.sum() > 0:
127
- output = cv2.connectedComponentsWithStats(apex_mask, 8, cv2.CV_32S)
128
- (numLabels, labels, stats, centroids) = output
129
- stats, centroids = stats[1:], centroids[1:] # skip background
130
- for i in range(numLabels-1):
131
- vert = {"xy": centroids[i], "type": "apex"}
132
- vertices.append(vert)
133
-
134
- # Eave end
135
- eave_end_color = np.array(gestalt_color_mapping['eave_end_point'])
136
- eave_end_mask = cv2.inRange(gest_seg_np, eave_end_color-0.5, eave_end_color+0.5)
137
- if eave_end_mask.sum() > 0:
138
- output = cv2.connectedComponentsWithStats(eave_end_mask, 8, cv2.CV_32S)
139
- (numLabels, labels, stats, centroids) = output
140
- stats, centroids = stats[1:], centroids[1:]
141
- for i in range(numLabels-1):
142
- vert = {"xy": centroids[i], "type": "eave_end_point"}
143
- vertices.append(vert)
144
-
145
- # Consolidate apex points as array:
146
- apex_pts = []
147
- apex_idx_map = [] # keep track of index in 'vertices'
148
- for idx, v in enumerate(vertices):
149
- apex_pts.append(v['xy'])
150
- apex_idx_map.append(idx)
151
- apex_pts = np.array(apex_pts)
152
 
 
153
  connections = []
154
  edge_classes = ['eave', 'ridge', 'rake', 'valley']
155
  for edge_class in edge_classes:
156
  edge_color = np.array(gestalt_color_mapping[edge_class])
157
- mask_raw = cv2.inRange(gest_seg_np, edge_color-0.5, edge_color+0.5)
158
- # Possibly do morphological open/close to avoid merges or small holes
159
- kernel = np.ones((5, 5), np.uint8) # smaller kernel to reduce over-merge
160
- mask = cv2.morphologyEx(mask_raw, cv2.MORPH_CLOSE, kernel)
161
  if mask.sum() == 0:
162
  continue
163
 
164
- # Connected components
165
- output = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
166
- (numLabels, labels, stats, centroids) = output
167
- # skip the background
168
- stats, centroids = stats[1:], centroids[1:]
169
- label_indices = range(1, numLabels)
170
 
171
- # For each connected component, do a line fit
172
- for lbl in label_indices:
173
  ys, xs = np.where(labels == lbl)
174
  if len(xs) < 2:
175
  continue
176
- # Fit a line using cv2.fitLine
 
177
  pts_for_fit = np.column_stack([xs, ys]).astype(np.float32)
178
- # (vx, vy, x0, y0) = direction + a point on the line
179
- line_params = cv2.fitLine(pts_for_fit, distType=cv2.DIST_L2,
180
  param=0, reps=0.01, aeps=0.01)
181
  vx, vy, x0, y0 = line_params.ravel()
182
- # We'll approximate endpoints by projecting (xs, ys) onto the line,
183
- # then taking min and max in the 1D param along the line.
184
-
185
- # param along the line = ( (x - x0)*vx + (y - y0)*vy )
186
- proj = ( (xs - x0)*vx + (ys - y0)*vy )
187
- proj_min, proj_max = proj.min(), proj.max()
188
- p1 = np.array([x0 + proj_min*vx, y0 + proj_min*vy])
189
- p2 = np.array([x0 + proj_max*vx, y0 + proj_max*vy])
190
-
191
- #--------------------------------------------------------------------------------
192
- # Step C: If apex points are within 'edge_th' of segment, they are connected
193
- #--------------------------------------------------------------------------------
194
  if len(apex_pts) < 2:
195
  continue
196
 
197
- # Distance from each apex to the line segment
198
- dists = np.array([
199
- point_to_segment_dist(apex_pts[i], p1, p2)
200
- for i in range(len(apex_pts))
201
- ])
202
-
203
- # Indices of apex points that are near
204
- near_mask = (dists <= edge_th)
205
- near_indices = np.where(near_mask)[0]
206
  if len(near_indices) < 2:
207
  continue
208
 
209
- # Connect each pair among these near apex points
210
  for i in range(len(near_indices)):
211
- for j in range(i+1, len(near_indices)):
212
- a_idx = near_indices[i]
213
- b_idx = near_indices[j]
214
- # 'a_idx' and 'b_idx' are indices in apex_pts / apex_idx_map
215
- vA = apex_idx_map[a_idx]
216
- vB = apex_idx_map[b_idx]
217
- # Store the connection using sorted indexing
218
- conn = tuple(sorted((vA, vB)))
219
- connections.append(conn)
220
 
221
  return vertices, connections
222
 
@@ -254,49 +223,32 @@ def get_uv_depth(vertices: List[dict],
254
  Depth value chosen for each vertex.
255
  """
256
 
257
- # Collect each vertex's (x, y)
258
  uv = np.array([vert['xy'] for vert in vertices], dtype=np.float32)
259
-
260
- # Convert to integer pixel coordinates (round or floor)
261
  uv_int = np.round(uv).astype(np.int32)
262
  H, W = depth_fitted.shape[:2]
263
-
264
- # Clip coordinates to stay within image bounds
265
  uv_int[:, 0] = np.clip(uv_int[:, 0], 0, W - 1)
266
  uv_int[:, 1] = np.clip(uv_int[:, 1], 0, H - 1)
267
-
268
- # Prepare output array of depths
269
  vertex_depth = np.zeros(len(vertices), dtype=np.float32)
270
  dense_count = 0
271
-
272
  for i, (x_i, y_i) in enumerate(uv_int):
273
- # Local region in [x_i - search_radius, x_i + search_radius]
274
  x0 = max(0, x_i - search_radius)
275
  x1 = min(W, x_i + search_radius + 1)
276
  y0 = max(0, y_i - search_radius)
277
  y1 = min(H, y_i + search_radius + 1)
278
-
279
- # Crop out the local window in sparse_depth
280
  region = sparse_depth[y0:y1, x0:x1]
281
-
282
- # Find all valid (non-zero) depths
283
- valid_mask = (region > 0)
284
- valid_y, valid_x = np.where(valid_mask)
285
-
286
  if valid_y.size > 0:
287
- # Compute global coordinates for each valid pixel
 
288
  global_x = x0 + valid_x
289
  global_y = y0 + valid_y
290
-
291
- # Compute squared distance to center (x_i, y_i)
292
  dist_sq = (global_x - x_i)**2 + (global_y - y_i)**2
293
-
294
- # Find the nearest valid pixel
295
  min_idx = np.argmin(dist_sq)
296
- nearest_depth = region[valid_y[min_idx], valid_x[min_idx]]
297
- vertex_depth[i] = nearest_depth
298
  else:
299
- # Fallback to the dense depth
300
  vertex_depth[i] = depth_fitted[y_i, x_i]
301
  dense_count += 1
302
  return uv, vertex_depth
@@ -408,48 +360,49 @@ def create_3d_wireframe_single_image(vertices: List[dict],
408
 
409
 
410
  def merge_vertices_3d(vert_edge_per_image, th=0.5):
411
- '''Merge vertices that are close to each other in 3D space and are of same types'''
412
- # Initialize structures to collect vertices and connections from all images
 
 
 
 
 
 
 
413
  all_3d_vertices = []
414
  connections_3d = []
415
- all_indexes = []
416
  cur_start = 0
417
  types = []
418
-
419
- # Combine vertices and update connection indices across all images
420
  for cimg_idx, (vertices, connections, vertices_3d) in vert_edge_per_image.items():
421
- types += [int(v['type']=='apex') for v in vertices]
422
  all_3d_vertices.append(vertices_3d)
423
- connections_3d+=[(x+cur_start,y+cur_start) for (x,y) in connections]
424
- cur_start+=len(vertices_3d)
425
  all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
426
-
427
- # Calculate distance matrix between all vertices
428
- distmat = cdist(all_3d_vertices, all_3d_vertices)
429
- types = np.array(types).reshape(-1,1)
430
- same_types = cdist(types, types)
431
-
432
- # Create mask for vertices that should be merged (close in space and same type)
433
- mask_to_merge = (distmat <= th) & (same_types==0)
434
  new_vertices = []
435
  new_connections = []
436
-
437
- # Extract vertex indices to merge based on the mask
 
 
438
  to_merge = sorted(list(set([tuple(a.nonzero()[0].tolist()) for a in mask_to_merge])))
439
-
440
- # Build groups of vertices to merge (transitive grouping)
441
  to_merge_final = defaultdict(list)
442
  for i in range(len(all_3d_vertices)):
443
  for j in to_merge:
444
  if i in j:
445
- to_merge_final[i]+=j
446
-
447
- # Remove duplicates in each group
448
  for k, v in to_merge_final.items():
449
  to_merge_final[k] = list(set(v))
450
-
451
- # Create final merge groups without duplicates
452
- already_there = set()
453
  merged = []
454
  for k, v in to_merge_final.items():
455
  if k in already_there:
@@ -457,18 +410,14 @@ def merge_vertices_3d(vert_edge_per_image, th=0.5):
457
  merged.append(v)
458
  for vv in v:
459
  already_there.add(vv)
460
-
461
- # Calculate new vertex positions (average of merged groups)
462
  old_idx_to_new = {}
463
- count=0
464
- for idxs in merged:
465
  new_vertices.append(all_3d_vertices[idxs].mean(axis=0))
466
  for idx in idxs:
467
  old_idx_to_new[idx] = count
468
- count +=1
469
- new_vertices=np.array(new_vertices)
470
-
471
- # Update connections to use new vertex indices
472
  for conn in connections_3d:
473
  new_con = sorted((old_idx_to_new[conn[0]], old_idx_to_new[conn[1]]))
474
  if new_con[0] == new_con[1]:
@@ -608,17 +557,21 @@ def get_sparse_depth(colmap_rec, img_id_substring, depth):
608
 
609
 
610
  def fit_scale_robust_median(depth, sparse_depth, validity_mask=None):
611
- """
612
- Fit a scale factor to the depth map using the median of the ratio of sparse to dense depth.
 
 
 
 
613
  """
614
  if validity_mask is None:
615
  mask = (sparse_depth != 0)
616
  else:
617
  mask = (sparse_depth != 0) & validity_mask
618
- mask = mask & (depth <50) & (sparse_depth <50)
619
  X = depth[mask]
620
  Y = sparse_depth[mask]
621
- alpha =np.median(Y/X)
622
  depth_fitted = alpha * depth
623
  return alpha, depth_fitted
624
 
@@ -630,20 +583,14 @@ def get_fitted_dense_depth(depth, colmap_rec, img_id, ade20k_seg):
630
 
631
  Parameters
632
  ----------
633
- depth : np.ndarray
634
- Initial dense depth map (H, W).
635
  colmap_rec : pycolmap.Reconstruction
636
- COLMAP reconstruction data.
637
  img_id : str
638
- Identifier for the current image within the COLMAP reconstruction.
639
- K : np.ndarray
640
- Camera intrinsic matrix (3x3).
641
- R : np.ndarray
642
- Camera rotation matrix (3x3).
643
- t : np.ndarray
644
- Camera translation vector (3,).
645
  ade20k_seg : PIL.Image
646
- ADE20k segmentation map for the image.
647
 
648
  Returns
649
  -------
@@ -673,17 +620,18 @@ def get_fitted_dense_depth(depth, colmap_rec, img_id, ade20k_seg):
673
  return depth_fitted, depth_sparse, True, col_img
674
 
675
 
676
- def prune_too_far(all_3d_vertices, connections_3d, colmap_rec, th = 3.0):
677
- """
678
- Prune vertices that are too far from sparse point cloud
679
-
 
680
  """
681
- xyz_sfm=[]
682
  for k, v in colmap_rec.points3D.items():
683
  xyz_sfm.append(v.xyz)
684
  xyz_sfm = np.array(xyz_sfm)
685
- distmat = cdist(all_3d_vertices, xyz_sfm)
686
- mindist = distmat.min(axis=1)
687
  mask = mindist <= th
688
  all_3d_vertices_new = all_3d_vertices[mask]
689
  old_idx_survived = np.arange(len(all_3d_vertices))[mask]
@@ -699,24 +647,15 @@ def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
699
  """
700
  good_entry = convert_entry_to_human_readable(entry)
701
  vert_edge_per_image = {}
702
- for i, (gest, depth, K, R, t, img_id, ade_seg) in enumerate(zip(good_entry['gestalt'],
703
- good_entry['depth'],
704
- good_entry['K'],
705
- good_entry['R'],
706
- good_entry['t'],
707
  good_entry['image_ids'],
708
- good_entry['ade'] # Added ade20k segmentation
709
  )):
710
  if 'colmap' in good_entry:
711
  colmap_rec = good_entry['colmap']
712
  else:
713
  colmap_rec = good_entry['colmap_binary']
714
- process_image = True
715
- if 'pose_only_in_colmap' in good_entry:
716
- process_image = good_entry['pose_only_in_colmap'][i]
717
- K = np.array(K)
718
- R = np.array(R)
719
- t = np.array(t)
720
  # Resize gestalt segmentation to match depth map size
721
  depth_size = (np.array(depth).shape[1], np.array(depth).shape[0]) # W, H
722
  gest_seg = gest.resize(depth_size)
 
8
  import numpy as np
9
  import pycolmap
10
  from PIL import Image as PImage
 
11
 
12
  from hoho2025.color_mappings import ade20k_color_mapping, gestalt_color_mapping
13
 
 
18
 
19
 
20
  def read_colmap_rec(colmap_data):
21
+ """Decode COLMAP reconstruction from the in-memory zip bytes stored in the dataset."""
22
  with tempfile.TemporaryDirectory() as tmpdir:
23
  with zipfile.ZipFile(io.BytesIO(colmap_data), "r") as zf:
24
+ zf.extractall(tmpdir)
 
25
  rec = pycolmap.Reconstruction(tmpdir)
26
  return rec
27
 
 
57
  return (u, v), p_cam[2]
58
 
59
  def convert_entry_to_human_readable(entry):
60
+ """Decode raw dataset fields into usable Python objects.
61
+
62
+ COLMAP fields (zipped bytes) → pycolmap.Reconstruction.
63
+ Geometry fields (wf_vertices, wf_edges, K, R, t) → numpy arrays.
64
+ Note: K/R/t here are the dataset's BPO camera parameters, separate from
65
+ the camera model stored inside the COLMAP reconstruction.
66
+ """
67
  out = {}
68
  for k, v in entry.items():
69
+ if 'colmap' in k and k != 'pose_only_in_colmap':
70
  out[k] = read_colmap_rec(v)
71
+ elif k in ['wf_vertices', 'wf_edges', 'K', 'R', 't']:
72
  out[k] = np.array(v)
73
  else:
74
+ out[k] = v
75
  out['__key__'] = entry['order_id']
76
  return out
77
 
 
113
 
114
 
115
  def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=25.0):
116
+ """Extract 2D wireframe vertices and edges from a gestalt segmentation map.
117
+
118
+ Gestalt marks vertex locations (apex, eave-end) as colored pixel blobs and
119
+ edge classes (eave, ridge, rake, valley) as thick painted strokes.
120
+
121
+ Vertex strategy: centroid of each connected blob → one vertex.
122
+ Edge strategy: fit a line to each connected stroke component, then connect
123
+ any two vertex blobs that both lie within `edge_th` pixels of that line.
124
+ We never use the fitted line's own endpoints as vertices.
125
  """
 
 
 
 
 
 
 
 
 
126
  if not isinstance(gest_seg_np, np.ndarray):
127
  gest_seg_np = np.array(gest_seg_np)
128
+
129
+ # --- Collect vertices from blob centroids ---
130
  vertices = []
131
+ for v_class, v_type in [('apex', 'apex'), ('eave_end_point', 'eave_end_point')]:
132
+ color = np.array(gestalt_color_mapping[v_class])
133
+ mask = cv2.inRange(gest_seg_np, color - 0.5, color + 0.5)
134
+ if mask.sum() == 0:
135
+ continue
136
+ _, _, _, centroids = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
137
+ for centroid in centroids[1:]: # skip background label
138
+ vertices.append({"xy": centroid, "type": v_type})
139
+
140
+ # Flat list of all vertex positions for distance queries below.
141
+ apex_pts = np.array([v['xy'] for v in vertices])
142
+ apex_idx_map = list(range(len(vertices)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
+ # --- Collect edges by fitting lines to stroke components ---
145
  connections = []
146
  edge_classes = ['eave', 'ridge', 'rake', 'valley']
147
  for edge_class in edge_classes:
148
  edge_color = np.array(gestalt_color_mapping[edge_class])
149
+ mask_raw = cv2.inRange(gest_seg_np, edge_color - 0.5, edge_color + 0.5)
150
+ # Morphological close bridges small gaps without over-merging distinct strokes.
151
+ mask = cv2.morphologyEx(mask_raw, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8))
 
152
  if mask.sum() == 0:
153
  continue
154
 
155
+ _, labels, _, _ = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
 
 
 
 
 
156
 
157
+ for lbl in range(1, labels.max() + 1):
 
158
  ys, xs = np.where(labels == lbl)
159
  if len(xs) < 2:
160
  continue
161
+
162
+ # Fit a line to the stroke pixels: returns (vx, vy, x0, y0).
163
  pts_for_fit = np.column_stack([xs, ys]).astype(np.float32)
164
+ line_params = cv2.fitLine(pts_for_fit, distType=cv2.DIST_L2,
 
165
  param=0, reps=0.01, aeps=0.01)
166
  vx, vy, x0, y0 = line_params.ravel()
167
+
168
+ # Recover the segment extent by projecting all stroke pixels onto the line
169
+ # and taking the extremes.
170
+ proj = (xs - x0) * vx + (ys - y0) * vy
171
+ p1 = np.array([x0 + proj.min() * vx, y0 + proj.min() * vy])
172
+ p2 = np.array([x0 + proj.max() * vx, y0 + proj.max() * vy])
173
+
 
 
 
 
 
174
  if len(apex_pts) < 2:
175
  continue
176
 
177
+ # Connect vertex blobs that lie close to this fitted segment.
178
+ dists = np.array([point_to_segment_dist(apex_pts[i], p1, p2)
179
+ for i in range(len(apex_pts))])
180
+ near_indices = np.where(dists <= edge_th)[0]
 
 
 
 
 
181
  if len(near_indices) < 2:
182
  continue
183
 
 
184
  for i in range(len(near_indices)):
185
+ for j in range(i + 1, len(near_indices)):
186
+ vA = apex_idx_map[near_indices[i]]
187
+ vB = apex_idx_map[near_indices[j]]
188
+ connections.append(tuple(sorted((vA, vB))))
 
 
 
 
 
189
 
190
  return vertices, connections
191
 
 
223
  Depth value chosen for each vertex.
224
  """
225
 
 
226
  uv = np.array([vert['xy'] for vert in vertices], dtype=np.float32)
 
 
227
  uv_int = np.round(uv).astype(np.int32)
228
  H, W = depth_fitted.shape[:2]
 
 
229
  uv_int[:, 0] = np.clip(uv_int[:, 0], 0, W - 1)
230
  uv_int[:, 1] = np.clip(uv_int[:, 1], 0, H - 1)
231
+
 
232
  vertex_depth = np.zeros(len(vertices), dtype=np.float32)
233
  dense_count = 0
234
+
235
  for i, (x_i, y_i) in enumerate(uv_int):
 
236
  x0 = max(0, x_i - search_radius)
237
  x1 = min(W, x_i + search_radius + 1)
238
  y0 = max(0, y_i - search_radius)
239
  y1 = min(H, y_i + search_radius + 1)
 
 
240
  region = sparse_depth[y0:y1, x0:x1]
241
+ valid_y, valid_x = np.where(region > 0)
242
+
 
 
 
243
  if valid_y.size > 0:
244
+ # Prefer the COLMAP sparse point nearest to the vertex over dense depth,
245
+ # since COLMAP depth is closer to GT metric while the dense map scale might be wrong.
246
  global_x = x0 + valid_x
247
  global_y = y0 + valid_y
 
 
248
  dist_sq = (global_x - x_i)**2 + (global_y - y_i)**2
 
 
249
  min_idx = np.argmin(dist_sq)
250
+ vertex_depth[i] = region[valid_y[min_idx], valid_x[min_idx]]
 
251
  else:
 
252
  vertex_depth[i] = depth_fitted[y_i, x_i]
253
  dense_count += 1
254
  return uv, vertex_depth
 
360
 
361
 
362
  def merge_vertices_3d(vert_edge_per_image, th=0.5):
363
+ """Merge vertices across all views into a single consistent 3D vertex set.
364
+
365
+ The same physical corner is independently lifted to 3D from each image that
366
+ sees it, producing near-duplicate vertices. We merge any two vertices of the
367
+ same type whose 3D distance is within `th` metres.
368
+
369
+ Grouping is transitive: if A~B and B~C then all three collapse to one vertex
370
+ at their mean position.
371
+ """
372
  all_3d_vertices = []
373
  connections_3d = []
 
374
  cur_start = 0
375
  types = []
376
+
 
377
  for cimg_idx, (vertices, connections, vertices_3d) in vert_edge_per_image.items():
378
+ types += [int(v['type'] == 'apex') for v in vertices]
379
  all_3d_vertices.append(vertices_3d)
380
+ connections_3d += [(x + cur_start, y + cur_start) for (x, y) in connections]
381
+ cur_start += len(vertices_3d)
382
  all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
383
+
384
+ diff = all_3d_vertices[:, None, :] - all_3d_vertices[None, :, :]
385
+ distmat = np.sqrt((diff ** 2).sum(axis=-1))
386
+ types = np.array(types)
387
+ same_types_mask = (types[:, None] == types[None, :])
388
+ mask_to_merge = (distmat <= th) & same_types_mask
389
+
 
390
  new_vertices = []
391
  new_connections = []
392
+
393
+ # Each row of mask_to_merge gives the set of vertices close to vertex i.
394
+ # Collect unique such sets, then union-find style: every vertex collects all
395
+ # other vertices it co-occurs with in any group.
396
  to_merge = sorted(list(set([tuple(a.nonzero()[0].tolist()) for a in mask_to_merge])))
 
 
397
  to_merge_final = defaultdict(list)
398
  for i in range(len(all_3d_vertices)):
399
  for j in to_merge:
400
  if i in j:
401
+ to_merge_final[i] += j
 
 
402
  for k, v in to_merge_final.items():
403
  to_merge_final[k] = list(set(v))
404
+
405
+ already_there = set()
 
406
  merged = []
407
  for k, v in to_merge_final.items():
408
  if k in already_there:
 
410
  merged.append(v)
411
  for vv in v:
412
  already_there.add(vv)
413
+
 
414
  old_idx_to_new = {}
415
+ for count, idxs in enumerate(merged):
 
416
  new_vertices.append(all_3d_vertices[idxs].mean(axis=0))
417
  for idx in idxs:
418
  old_idx_to_new[idx] = count
419
+ new_vertices = np.array(new_vertices)
420
+
 
 
421
  for conn in connections_3d:
422
  new_con = sorted((old_idx_to_new[conn[0]], old_idx_to_new[conn[1]]))
423
  if new_con[0] == new_con[1]:
 
557
 
558
 
559
  def fit_scale_robust_median(depth, sparse_depth, validity_mask=None):
560
+ """Recover the absolute scale of a monocular depth map using COLMAP sparse points.
561
+
562
+ Monocular depth is scale-ambiguous; COLMAP gives metric depth at sparse locations.
563
+ We estimate a single scale factor alpha = median(sparse / dense) over pixels where
564
+ both are available. The 50 m cap excludes sky / background points that would skew
565
+ the ratio toward large values.
566
  """
567
  if validity_mask is None:
568
  mask = (sparse_depth != 0)
569
  else:
570
  mask = (sparse_depth != 0) & validity_mask
571
+ mask = mask & (depth < 50) & (sparse_depth < 50)
572
  X = depth[mask]
573
  Y = sparse_depth[mask]
574
+ alpha = np.median(Y / X)
575
  depth_fitted = alpha * depth
576
  return alpha, depth_fitted
577
 
 
583
 
584
  Parameters
585
  ----------
586
+ depth : PIL.Image
587
+ Dense monocular depth map (pixel values in mm).
588
  colmap_rec : pycolmap.Reconstruction
589
+ COLMAP reconstruction used to obtain metric sparse depth.
590
  img_id : str
591
+ Substring matched against COLMAP image names to locate the right camera.
 
 
 
 
 
 
592
  ade20k_seg : PIL.Image
593
+ ADE20k segmentation used to restrict scale fitting to the building region.
594
 
595
  Returns
596
  -------
 
620
  return depth_fitted, depth_sparse, True, col_img
621
 
622
 
623
+ def prune_too_far(all_3d_vertices, connections_3d, colmap_rec, th=3.0):
624
+ """Remove vertices that have no SfM support within `th` metres.
625
+
626
+ Vertices lifted from noisy monocular depth with no nearby COLMAP point are
627
+ likely hallucinations; discarding them improves geometric accuracy.
628
  """
629
+ xyz_sfm = []
630
  for k, v in colmap_rec.points3D.items():
631
  xyz_sfm.append(v.xyz)
632
  xyz_sfm = np.array(xyz_sfm)
633
+ diff = all_3d_vertices[:, None, :] - xyz_sfm[None, :, :]
634
+ mindist = np.sqrt((diff ** 2).sum(axis=-1)).min(axis=1)
635
  mask = mindist <= th
636
  all_3d_vertices_new = all_3d_vertices[mask]
637
  old_idx_survived = np.arange(len(all_3d_vertices))[mask]
 
647
  """
648
  good_entry = convert_entry_to_human_readable(entry)
649
  vert_edge_per_image = {}
650
+ for i, (gest, depth, img_id, ade_seg) in enumerate(zip(good_entry['gestalt'],
651
+ good_entry['depth'],
 
 
 
652
  good_entry['image_ids'],
653
+ good_entry['ade']
654
  )):
655
  if 'colmap' in good_entry:
656
  colmap_rec = good_entry['colmap']
657
  else:
658
  colmap_rec = good_entry['colmap_binary']
 
 
 
 
 
 
659
  # Resize gestalt segmentation to match depth map size
660
  depth_size = (np.array(depth).shape[1], np.array(depth).shape[0]) # W, H
661
  gest_seg = gest.resize(depth_size)
hoho2025/metric_helper.py CHANGED
@@ -1,5 +1,4 @@
1
  import numpy as np
2
- from scipy.spatial.distance import cdist
3
  from scipy.optimize import linear_sum_assignment
4
  import torch
5
  import trimesh
@@ -135,7 +134,8 @@ def compute_ap_metrics(pd_vertices, gt_vertices, thresh=25):
135
  if len(pd_vertices) == 0 or len(gt_vertices) == 0:
136
  return 0.0
137
 
138
- dists = cdist(pd_vertices, gt_vertices)
 
139
  row_ind, col_ind = linear_sum_assignment(dists)
140
 
141
  tp = (dists[row_ind, col_ind] <= thresh).sum()
 
1
  import numpy as np
 
2
  from scipy.optimize import linear_sum_assignment
3
  import torch
4
  import trimesh
 
134
  if len(pd_vertices) == 0 or len(gt_vertices) == 0:
135
  return 0.0
136
 
137
+ diff = np.asarray(pd_vertices)[:, None, :] - np.asarray(gt_vertices)[None, :, :]
138
+ dists = np.sqrt((diff ** 2).sum(axis=-1))
139
  row_ind, col_ind = linear_sum_assignment(dists)
140
 
141
  tp = (dists[row_ind, col_ind] <= thresh).sum()
pyproject.toml CHANGED
@@ -1,3 +1,33 @@
1
  [build-system]
2
  requires = ["setuptools>=42", "wheel"]
3
  build-backend = "setuptools.build_meta"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  [build-system]
2
  requires = ["setuptools>=42", "wheel"]
3
  build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "hoho2025"
7
+ version = "0.2.2"
8
+ description = "Tools and utilities for the HoHo Dataset and S23DR Competition"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ authors = [
12
+ { name = "Jack Langerman, Dmytro Mishkin, S23DR Organizing Team", email = "hoho@jackml.com" },
13
+ ]
14
+ urls = { homepage = "https://github.com/s23dr/hoho2025" }
15
+ dependencies = [
16
+ "datasets",
17
+ "huggingface-hub",
18
+ "ipywidgets",
19
+ "matplotlib",
20
+ "numpy",
21
+ "opencv-python",
22
+ "Pillow",
23
+ "plotly",
24
+ "pycolmap>=0.6",
25
+ "scipy",
26
+ "torch",
27
+ "trimesh",
28
+ "webdataset",
29
+ "manifold3d",
30
+ ]
31
+
32
+ [tool.setuptools.packages.find]
33
+ where = ["."]
setup.py DELETED
@@ -1,37 +0,0 @@
1
- from setuptools import setup, find_packages
2
- import glob
3
- import os
4
-
5
- # Try to read from requirements.txt, but have fallback
6
- try:
7
- here = os.path.abspath(os.path.dirname(__file__))
8
- with open(os.path.join(here, 'requirements.txt')) as f:
9
- required = f.read().splitlines()
10
- except FileNotFoundError:
11
- # Fallback to hardcoded dependencies
12
- required = [
13
- 'datasets',
14
- 'huggingface-hub',
15
- 'ipywidgets',
16
- 'matplotlib',
17
- 'numpy',
18
- 'opencv-python',
19
- 'Pillow',
20
- 'plotly',
21
- 'pycolmap',
22
- 'scipy',
23
- 'torch',
24
- 'trimesh',
25
- 'webdataset==0.2.111',
26
- ]
27
-
28
- setup(name='hoho2025',
29
- version='0.2.1',
30
- description='Tools and utilites for the HoHo Dataset and S23DR Competition',
31
- url='https://github.com/s23dr/hoho2025',
32
- author='Jack Langerman, Dmytro Mishkin, S23DR Orgainizing Team',
33
- author_email='hoho@jackml.com',
34
- install_requires=required,
35
- packages=find_packages(),
36
- python_requires='>=3.10',
37
- include_package_data=True)