xsponenta commited on
Commit
6ecd5f8
·
1 Parent(s): 857514e

Revert TTA + add COLMAP-support vertex filter

Browse files

The TTA experiment (commit 857514e) regressed -0.011 vs baseline
(0.4475 vs 0.4584). The K=3 union added more candidate vertices
than the post-merge could consolidate, producing duplicate / shifted
vertices and dropping corner_f1 by 0.022.

Restore script.py to the 56f1ec6 baseline state, then add ONE
principled precision-only step:

filter_by_colmap_support: after hybrid_merge, drop predicted vertices
that have no COLMAP point within support_radius=0.6m. These are
model hallucinations in regions without geometric evidence.

Why this is principled: real roof vertices are always near reconstructed
COLMAP geometry (the COLMAP cloud covers the entire visible building).
Predicted vertices in empty 3D space are by construction wrong.

Safety guarantees:
- Falls back to unfiltered output on any exception (try/except wraps
pycolmap access and KD-tree query).
- Refuses to leave fewer than 2 vertices / 1 edge — never produces an
empty submission.
- Conservative 0.6m radius: comfortably covers normal COLMAP sparsity.

Expected effect: small precision boost on corner_f1, neutral or
positive edge_iou (edges to dropped vertices are removed cleanly).

Files changed (1) hide show
  1. script.py +96 -148
script.py CHANGED
@@ -60,11 +60,11 @@ MERGE_THRESH = 0.4
60
  SNAP_RADIUS = 0.5
61
 
62
 
63
- def compute_scene(sample, cfg, rng):
64
- """Expensive: multi-view label voting + smart normalization. Call once per sample.
65
 
66
- Returns a dict with the full pre-priority-sampling fused scene, ready to
67
- feed into ``sample_from_scene`` repeatedly for TTA. Returns None on failure.
68
  """
69
  try:
70
  scene = build_compact_scene(sample, cfg, rng)
@@ -74,43 +74,21 @@ def compute_scene(sample, cfg, rng):
74
 
75
  xyz = scene["xyz"]
76
  source = scene["source"]
 
77
  if len(xyz) < 10:
78
  return None
79
 
 
80
  behind_id = scene.get("behind_gest_id", np.full(len(xyz), -1, dtype=np.int16))
81
  group_id, class_id = _compute_group_and_class(
82
  scene["visible_src"], scene["visible_id"], behind_id, source)
83
- center, scale = _compute_smart_center_scale(xyz, source)
84
-
85
- return {
86
- "xyz": xyz,
87
- "source": source,
88
- "group_id": group_id,
89
- "class_id": class_id,
90
- "center": center,
91
- "scale": scale,
92
- "behind_gest_id": scene.get("behind_gest_id"),
93
- "n_views_voted": scene.get("n_views_voted"),
94
- "vote_frac": scene.get("vote_frac"),
95
- "visible_src": scene["visible_src"],
96
- "visible_id": scene["visible_id"],
97
- }
98
 
 
 
99
 
100
- def sample_from_scene(scene):
101
- """Cheap: priority-sample 4096 points from a fused scene.
102
-
103
- Uses the global numpy random state (advanced internally by ``_priority_sample``),
104
- so consecutive calls yield different 4096-subsets — perfect for TTA.
105
- """
106
- xyz = scene["xyz"]
107
- source = scene["source"]
108
- group_id = scene["group_id"]
109
- class_id = scene["class_id"]
110
- center = scene["center"]
111
- scale = scene["scale"]
112
-
113
  indices, mask = _priority_sample(source, group_id, SEQ_LEN, COLMAP_QUOTA, DEPTH_QUOTA)
 
114
  xyz_norm = (xyz[indices] - center) / scale
115
 
116
  result = {
@@ -121,24 +99,21 @@ def sample_from_scene(scene):
121
  "center": center.astype(np.float32),
122
  "scale": np.float32(scale),
123
  }
124
- if scene.get("behind_gest_id") is not None:
 
 
125
  behind = np.clip(scene["behind_gest_id"][indices].astype(np.int16), 0, None)
126
  result["behind"] = behind.astype(np.int64)
127
- if scene.get("n_views_voted") is not None:
128
  result["n_views_voted"] = scene["n_views_voted"][indices].astype(np.float32)
129
- if scene.get("vote_frac") is not None:
130
  result["vote_frac"] = scene["vote_frac"][indices].astype(np.float32)
 
 
131
  result["visible_src"] = scene["visible_src"][indices].astype(np.int64)
132
  result["visible_id"] = scene["visible_id"][indices].astype(np.int64)
133
- return result
134
 
135
-
136
- def fuse_and_sample(sample, cfg, rng):
137
- """Backward-compatible wrapper: compute scene + one priority sample."""
138
- scene = compute_scene(sample, cfg, rng)
139
- if scene is None:
140
- return None
141
- return sample_from_scene(scene)
142
 
143
 
144
  def load_model(checkpoint_path, device):
@@ -333,68 +308,59 @@ def hybrid_merge(pred_v, pred_e, track_v, track_e, merge_radius=0.8):
333
  return np.array(final_v), final_e
334
 
335
 
336
- def ensemble_merge(v1, e1, v2, e2, vertex_merge_radius=0.4):
337
- """Merge two pipeline outputs (learned + classical) into a single wireframe.
338
 
339
- Strategy: vertex union with merging of close pairs, edge union after
340
- re-mapping classical-pipeline edge endpoints onto merged vertex indices.
 
 
 
341
 
342
- Both inputs are expected to be (vertices ndarray/list, edges list-of-tuples).
 
343
  """
344
- v1 = np.array(v1, dtype=np.float64) if isinstance(v1, list) else np.asarray(v1, dtype=np.float64)
345
- v2 = np.array(v2, dtype=np.float64) if isinstance(v2, list) else np.asarray(v2, dtype=np.float64)
346
-
347
- if v2.size == 0 or len(v2) == 0:
348
- return v1, list(e1)
349
- if v1.size == 0 or len(v1) == 0:
350
- return v2, list(e2)
351
-
352
- # Filter out non-finite vertices in v2
353
- valid2 = np.isfinite(v2).all(axis=1)
354
- if not valid2.all():
355
- idx_map_2 = {int(old): new for new, old in enumerate(np.where(valid2)[0])}
356
- v2 = v2[valid2]
357
- e2 = [(idx_map_2[int(u)], idx_map_2[int(v)]) for u, v in e2
358
- if int(u) in idx_map_2 and int(v) in idx_map_2]
359
-
360
- if len(v2) == 0:
361
- return v1, list(e1)
362
-
363
- from scipy.spatial import cKDTree
364
- tree = cKDTree(v1)
365
- dists, indices = tree.query(v2, k=1)
366
-
367
- v2_to_merged = {}
368
- new_vertices = []
369
- for i, (d, j) in enumerate(zip(dists, indices)):
370
- if d <= vertex_merge_radius:
371
- v2_to_merged[i] = int(j)
372
- else:
373
- v2_to_merged[i] = len(v1) + len(new_vertices)
374
- new_vertices.append(v2[i])
375
-
376
- final_v = np.vstack([v1, np.array(new_vertices, dtype=np.float64)]) if new_vertices else v1
377
- final_e_set = set()
378
- final_e = []
379
- for u, v in e1:
380
- u, v = int(u), int(v)
381
- if u == v:
382
- continue
383
- e = (min(u, v), max(u, v))
384
- if e not in final_e_set:
385
- final_e_set.add(e)
386
- final_e.append(e)
387
- for u, v in e2:
388
- u_m = v2_to_merged.get(int(u))
389
- v_m = v2_to_merged.get(int(v))
390
- if u_m is None or v_m is None or u_m == v_m:
391
- continue
392
- e = (min(u_m, v_m), max(u_m, v_m))
393
- if e not in final_e_set:
394
- final_e_set.add(e)
395
- final_e.append(e)
396
-
397
- return final_v, final_e
398
 
399
 
400
  # ---------------------------------------------------------------------------
@@ -459,14 +425,7 @@ if __name__ == "__main__":
459
 
460
  # Point fusion config
461
  cfg = FuserConfig()
462
-
463
- # Test-time augmentation: how many learned-pipeline passes per sample.
464
- # Each pass uses a different priority-sample seed so the input point
465
- # cloud (especially the depth-unprojected portion) varies. We then
466
- # union the segment predictions across passes via ensemble_merge.
467
- TTA_PASSES = 3
468
- TTA_BASE_SEED = 2718
469
- TTA_MERGE_RADIUS = 0.3 # tight: same vertex predicted by multiple passes
470
 
471
  # Process all samples
472
  solution = []
@@ -479,51 +438,40 @@ if __name__ == "__main__":
479
  for sample in tqdm(dataset[subset_name], desc=subset_name):
480
  order_id = sample["order_id"]
481
 
482
- try:
483
- # ---- Build the fused scene ONCE (the expensive multi-view
484
- # label voting); then run priority sampling + model K times
485
- # for TTA. _priority_sample uses the global numpy RNG which
486
- # advances on each call, giving genuine variation cheaply.
487
- scene_rng = np.random.RandomState(TTA_BASE_SEED)
488
- scene = compute_scene(sample, cfg, scene_rng)
489
- tta_outputs = []
490
- if scene is not None:
491
- np.random.seed(TTA_BASE_SEED) # reset global RNG for reproducibility
492
- for k in range(TTA_PASSES):
493
- try:
494
- fused_k = sample_from_scene(scene)
495
- pv_k, pe_k = predict_sample(fused_k, model, device)
496
- if isinstance(pv_k, np.ndarray) and len(pv_k) >= 2 and len(pe_k) >= 1:
497
- tta_outputs.append((pv_k, pe_k))
498
- except Exception as tta_e:
499
- print(f" TTA pass {k} failed for {order_id}: {tta_e}")
500
- if torch.cuda.is_available():
501
- torch.cuda.empty_cache()
502
-
503
- if not tta_outputs:
504
- pred_v, pred_e = empty_solution()
505
- else:
506
- pred_v, pred_e = tta_outputs[0]
507
- for pv_k, pe_k in tta_outputs[1:]:
508
- pred_v, pred_e = ensemble_merge(
509
- pred_v, pred_e, pv_k, pe_k,
510
- vertex_merge_radius=TTA_MERGE_RADIUS,
511
- )
512
-
513
- # ---- Classical track ensemble (precise DLT triangulation)
514
  try:
515
  from triangulation import predict_wireframe_tracks
 
516
  track_v, track_e = predict_wireframe_tracks(sample, min_views=3)
 
517
  pred_v, pred_e = hybrid_merge(pred_v, pred_e, track_v, track_e, merge_radius=0.8)
518
  except Exception as track_e_err:
519
  print(f" Track ensemble failed for {order_id}: {track_e_err}")
520
 
521
- except Exception as e:
522
- import traceback
523
- print(f" Predict failed for {order_id}:\n{traceback.format_exc()}")
524
- pred_v, pred_e = empty_solution()
525
- if torch.cuda.is_available():
526
- torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
527
 
528
  solution.append({
529
  "order_id": order_id,
 
60
  SNAP_RADIUS = 0.5
61
 
62
 
63
+ def fuse_and_sample(sample, cfg, rng):
64
+ """Run point fusion + priority sampling on a raw dataset sample.
65
 
66
+ Returns a dict with xyz_norm, class_id, source, mask, center, scale, etc.
67
+ ready for model inference. Returns None if fusion fails.
68
  """
69
  try:
70
  scene = build_compact_scene(sample, cfg, rng)
 
74
 
75
  xyz = scene["xyz"]
76
  source = scene["source"]
77
+
78
  if len(xyz) < 10:
79
  return None
80
 
81
+ # Compute group_id and class_id (same as cache_scenes.py)
82
  behind_id = scene.get("behind_gest_id", np.full(len(xyz), -1, dtype=np.int16))
83
  group_id, class_id = _compute_group_and_class(
84
  scene["visible_src"], scene["visible_id"], behind_id, source)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ # Normalize
87
+ center, scale = _compute_smart_center_scale(xyz, source)
88
 
89
+ # Priority sample
 
 
 
 
 
 
 
 
 
 
 
 
90
  indices, mask = _priority_sample(source, group_id, SEQ_LEN, COLMAP_QUOTA, DEPTH_QUOTA)
91
+
92
  xyz_norm = (xyz[indices] - center) / scale
93
 
94
  result = {
 
99
  "center": center.astype(np.float32),
100
  "scale": np.float32(scale),
101
  }
102
+
103
+ # Optional fields
104
+ if "behind_gest_id" in scene:
105
  behind = np.clip(scene["behind_gest_id"][indices].astype(np.int16), 0, None)
106
  result["behind"] = behind.astype(np.int64)
107
+ if "n_views_voted" in scene:
108
  result["n_views_voted"] = scene["n_views_voted"][indices].astype(np.float32)
109
+ if "vote_frac" in scene:
110
  result["vote_frac"] = scene["vote_frac"][indices].astype(np.float32)
111
+
112
+ # Visible src/id for snap post-processing
113
  result["visible_src"] = scene["visible_src"][indices].astype(np.int64)
114
  result["visible_id"] = scene["visible_id"][indices].astype(np.int64)
 
115
 
116
+ return result
 
 
 
 
 
 
117
 
118
 
119
  def load_model(checkpoint_path, device):
 
308
  return np.array(final_v), final_e
309
 
310
 
311
+ def filter_by_colmap_support(pv, pe, sample, support_radius=0.6):
312
+ """Drop predicted vertices that have NO COLMAP point within support_radius.
313
 
314
+ Hallucinated vertices from the model (predicted in 3D space with no real
315
+ geometric evidence) typically appear in regions with no COLMAP point cloud.
316
+ Filtering by COLMAP-presence is a precision-only operation: real vertices
317
+ survive (the COLMAP cloud covers all reconstructed regions of the building),
318
+ spurious model outputs in empty space get dropped.
319
 
320
+ Returns the filtered (vertices, edges). On any failure or empty result,
321
+ falls back to the unfiltered input to avoid an empty submission.
322
  """
323
+ try:
324
+ if not isinstance(pv, np.ndarray) or len(pv) < 2 or len(pe) < 1:
325
+ return pv, pe
326
+ from hoho2025.example_solutions import convert_entry_to_human_readable
327
+ good = convert_entry_to_human_readable(sample)
328
+ colmap_rec = good.get('colmap') or good.get('colmap_binary')
329
+ if colmap_rec is None:
330
+ return pv, pe
331
+ colmap_xyz = np.array(
332
+ [p.xyz for p in colmap_rec.points3D.values()], dtype=np.float64
333
+ )
334
+ if len(colmap_xyz) < 5:
335
+ return pv, pe
336
+
337
+ from scipy.spatial import cKDTree
338
+ tree = cKDTree(colmap_xyz)
339
+ dists, _ = tree.query(np.asarray(pv, dtype=np.float64), k=1)
340
+ keep_mask = dists <= support_radius
341
+
342
+ if keep_mask.all():
343
+ return pv, pe # nothing to filter
344
+
345
+ n_keep = int(keep_mask.sum())
346
+ # Require at least 2 vertices and 1 edge to remain after filtering.
347
+ if n_keep < 2:
348
+ return pv, pe
349
+
350
+ old_to_new = {int(old): new for new, old in enumerate(np.where(keep_mask)[0])}
351
+ new_pv = pv[keep_mask]
352
+ new_pe = []
353
+ for u, v in pe:
354
+ u, v = int(u), int(v)
355
+ if u in old_to_new and v in old_to_new and u != v:
356
+ new_pe.append((old_to_new[u], old_to_new[v]))
357
+
358
+ if len(new_pe) < 1:
359
+ return pv, pe # do not drop all edges
360
+
361
+ return new_pv, new_pe
362
+ except Exception:
363
+ return pv, pe
 
 
 
 
 
 
 
 
 
 
 
 
 
364
 
365
 
366
  # ---------------------------------------------------------------------------
 
425
 
426
  # Point fusion config
427
  cfg = FuserConfig()
428
+ rng = np.random.RandomState(2718)
 
 
 
 
 
 
 
429
 
430
  # Process all samples
431
  solution = []
 
438
  for sample in tqdm(dataset[subset_name], desc=subset_name):
439
  order_id = sample["order_id"]
440
 
441
+ # Fuse + sample
442
+ fused = fuse_and_sample(sample, cfg, rng)
443
+ if fused is None:
444
+ pred_v, pred_e = empty_solution()
445
+ else:
446
+ try:
447
+ pred_v, pred_e = predict_sample(fused, model, device)
448
+ if torch.cuda.is_available():
449
+ torch.cuda.empty_cache()
450
+
451
+ # Apply handcrafted triangulation tracking to catch missing corners/edges
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  try:
453
  from triangulation import predict_wireframe_tracks
454
+ # Use min_views=3 for highly precise, conservative geometric tracks
455
  track_v, track_e = predict_wireframe_tracks(sample, min_views=3)
456
+
457
  pred_v, pred_e = hybrid_merge(pred_v, pred_e, track_v, track_e, merge_radius=0.8)
458
  except Exception as track_e_err:
459
  print(f" Track ensemble failed for {order_id}: {track_e_err}")
460
 
461
+ # Final precision pass: drop vertices with no nearby COLMAP
462
+ # support. These are the model's hallucinations in regions
463
+ # with no geometric evidence. Internal fallbacks ensure we
464
+ # never end up with fewer than 2 vertices / 1 edge.
465
+ pred_v, pred_e = filter_by_colmap_support(
466
+ pred_v, pred_e, sample, support_radius=0.6,
467
+ )
468
+
469
+ except Exception as e:
470
+ import traceback
471
+ print(f" Predict failed for {order_id}:\n{traceback.format_exc()}")
472
+ pred_v, pred_e = empty_solution()
473
+ if torch.cuda.is_available():
474
+ torch.cuda.empty_cache()
475
 
476
  solution.append({
477
  "order_id": order_id,