Revert TTA + add COLMAP-support vertex filter

The TTA experiment (commit 857514e) regressed -0.011 vs baseline
(0.4475 vs 0.4584). The K=3 union added more candidate vertices
than the post-merge could consolidate, producing duplicate / shifted
vertices and dropping corner_f1 by 0.022.

Restore script.py to the 56f1ec6 baseline state, then add ONE
principled precision-only step:

filter_by_colmap_support: after hybrid_merge, drop predicted vertices
that have no COLMAP point within support_radius=0.6m. These are
model hallucinations in regions without geometric evidence.

Why this is principled: real roof vertices are always near reconstructed
COLMAP geometry (the COLMAP cloud covers the entire visible building).
Predicted vertices in empty 3D space are by construction wrong.

Safety guarantees:
- Falls back to unfiltered output on any exception (try/except wraps
pycolmap access and KD-tree query).
- Refuses to leave fewer than 2 vertices / 1 edge — never produces an
empty submission.
- Conservative 0.6m radius: comfortably covers normal COLMAP sparsity.

Expected effect: small precision boost on corner_f1, neutral or
positive edge_iou (edges to dropped vertices are removed cleanly).

Files changed (1) hide show

script.py +96 -148

script.py CHANGED Viewed

@@ -60,11 +60,11 @@ MERGE_THRESH = 0.4
 SNAP_RADIUS = 0.5
-def compute_scene(sample, cfg, rng):
-    """Expensive: multi-view label voting + smart normalization. Call once per sample.
-    Returns a dict with the full pre-priority-sampling fused scene, ready to
-    feed into ``sample_from_scene`` repeatedly for TTA. Returns None on failure.
     """
     try:
         scene = build_compact_scene(sample, cfg, rng)
@@ -74,43 +74,21 @@ def compute_scene(sample, cfg, rng):
     xyz = scene["xyz"]
     source = scene["source"]
     if len(xyz) < 10:
         return None
     behind_id = scene.get("behind_gest_id", np.full(len(xyz), -1, dtype=np.int16))
     group_id, class_id = _compute_group_and_class(
         scene["visible_src"], scene["visible_id"], behind_id, source)
-    center, scale = _compute_smart_center_scale(xyz, source)
-    return {
-        "xyz": xyz,
-        "source": source,
-        "group_id": group_id,
-        "class_id": class_id,
-        "center": center,
-        "scale": scale,
-        "behind_gest_id": scene.get("behind_gest_id"),
-        "n_views_voted": scene.get("n_views_voted"),
-        "vote_frac": scene.get("vote_frac"),
-        "visible_src": scene["visible_src"],
-        "visible_id": scene["visible_id"],
-    }
-def sample_from_scene(scene):
-    """Cheap: priority-sample 4096 points from a fused scene.
-    Uses the global numpy random state (advanced internally by ``_priority_sample``),
-    so consecutive calls yield different 4096-subsets — perfect for TTA.
-    """
-    xyz = scene["xyz"]
-    source = scene["source"]
-    group_id = scene["group_id"]
-    class_id = scene["class_id"]
-    center = scene["center"]
-    scale = scene["scale"]
     indices, mask = _priority_sample(source, group_id, SEQ_LEN, COLMAP_QUOTA, DEPTH_QUOTA)
     xyz_norm = (xyz[indices] - center) / scale
     result = {
@@ -121,24 +99,21 @@ def sample_from_scene(scene):
         "center": center.astype(np.float32),
         "scale": np.float32(scale),
     }
-    if scene.get("behind_gest_id") is not None:
         behind = np.clip(scene["behind_gest_id"][indices].astype(np.int16), 0, None)
         result["behind"] = behind.astype(np.int64)
-    if scene.get("n_views_voted") is not None:
         result["n_views_voted"] = scene["n_views_voted"][indices].astype(np.float32)
-    if scene.get("vote_frac") is not None:
         result["vote_frac"] = scene["vote_frac"][indices].astype(np.float32)
     result["visible_src"] = scene["visible_src"][indices].astype(np.int64)
     result["visible_id"] = scene["visible_id"][indices].astype(np.int64)
-    return result
-def fuse_and_sample(sample, cfg, rng):
-    """Backward-compatible wrapper: compute scene + one priority sample."""
-    scene = compute_scene(sample, cfg, rng)
-    if scene is None:
-        return None
-    return sample_from_scene(scene)
 def load_model(checkpoint_path, device):
@@ -333,68 +308,59 @@ def hybrid_merge(pred_v, pred_e, track_v, track_e, merge_radius=0.8):
     return np.array(final_v), final_e
-def ensemble_merge(v1, e1, v2, e2, vertex_merge_radius=0.4):
-    """Merge two pipeline outputs (learned + classical) into a single wireframe.
-    Strategy: vertex union with merging of close pairs, edge union after
-    re-mapping classical-pipeline edge endpoints onto merged vertex indices.
-    Both inputs are expected to be (vertices ndarray/list, edges list-of-tuples).
     """
-    v1 = np.array(v1, dtype=np.float64) if isinstance(v1, list) else np.asarray(v1, dtype=np.float64)
-    v2 = np.array(v2, dtype=np.float64) if isinstance(v2, list) else np.asarray(v2, dtype=np.float64)
-    if v2.size == 0 or len(v2) == 0:
-        return v1, list(e1)
-    if v1.size == 0 or len(v1) == 0:
-        return v2, list(e2)
-    # Filter out non-finite vertices in v2
-    valid2 = np.isfinite(v2).all(axis=1)
-    if not valid2.all():
-        idx_map_2 = {int(old): new for new, old in enumerate(np.where(valid2)[0])}
-        v2 = v2[valid2]
-        e2 = [(idx_map_2[int(u)], idx_map_2[int(v)]) for u, v in e2
-              if int(u) in idx_map_2 and int(v) in idx_map_2]
-    if len(v2) == 0:
-        return v1, list(e1)
-    from scipy.spatial import cKDTree
-    tree = cKDTree(v1)
-    dists, indices = tree.query(v2, k=1)
-    v2_to_merged = {}
-    new_vertices = []
-    for i, (d, j) in enumerate(zip(dists, indices)):
-        if d <= vertex_merge_radius:
-            v2_to_merged[i] = int(j)
-        else:
-            v2_to_merged[i] = len(v1) + len(new_vertices)
-            new_vertices.append(v2[i])
-    final_v = np.vstack([v1, np.array(new_vertices, dtype=np.float64)]) if new_vertices else v1
-    final_e_set = set()
-    final_e = []
-    for u, v in e1:
-        u, v = int(u), int(v)
-        if u == v:
-            continue
-        e = (min(u, v), max(u, v))
-        if e not in final_e_set:
-            final_e_set.add(e)
-            final_e.append(e)
-    for u, v in e2:
-        u_m = v2_to_merged.get(int(u))
-        v_m = v2_to_merged.get(int(v))
-        if u_m is None or v_m is None or u_m == v_m:
-            continue
-        e = (min(u_m, v_m), max(u_m, v_m))
-        if e not in final_e_set:
-            final_e_set.add(e)
-            final_e.append(e)
-    return final_v, final_e
 # ---------------------------------------------------------------------------
@@ -459,14 +425,7 @@ if __name__ == "__main__":
     # Point fusion config
     cfg = FuserConfig()
-    # Test-time augmentation: how many learned-pipeline passes per sample.
-    # Each pass uses a different priority-sample seed so the input point
-    # cloud (especially the depth-unprojected portion) varies. We then
-    # union the segment predictions across passes via ensemble_merge.
-    TTA_PASSES = 3
-    TTA_BASE_SEED = 2718
-    TTA_MERGE_RADIUS = 0.3  # tight: same vertex predicted by multiple passes
     # Process all samples
     solution = []
@@ -479,51 +438,40 @@ if __name__ == "__main__":
         for sample in tqdm(dataset[subset_name], desc=subset_name):
             order_id = sample["order_id"]
-            try:
-                # ---- Build the fused scene ONCE (the expensive multi-view
-                # label voting); then run priority sampling + model K times
-                # for TTA. _priority_sample uses the global numpy RNG which
-                # advances on each call, giving genuine variation cheaply.
-                scene_rng = np.random.RandomState(TTA_BASE_SEED)
-                scene = compute_scene(sample, cfg, scene_rng)
-                tta_outputs = []
-                if scene is not None:
-                    np.random.seed(TTA_BASE_SEED)  # reset global RNG for reproducibility
-                    for k in range(TTA_PASSES):
-                        try:
-                            fused_k = sample_from_scene(scene)
-                            pv_k, pe_k = predict_sample(fused_k, model, device)
-                            if isinstance(pv_k, np.ndarray) and len(pv_k) >= 2 and len(pe_k) >= 1:
-                                tta_outputs.append((pv_k, pe_k))
-                        except Exception as tta_e:
-                            print(f"  TTA pass {k} failed for {order_id}: {tta_e}")
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
-                if not tta_outputs:
-                    pred_v, pred_e = empty_solution()
-                else:
-                    pred_v, pred_e = tta_outputs[0]
-                    for pv_k, pe_k in tta_outputs[1:]:
-                        pred_v, pred_e = ensemble_merge(
-                            pred_v, pred_e, pv_k, pe_k,
-                            vertex_merge_radius=TTA_MERGE_RADIUS,
-                        )
-                    # ---- Classical track ensemble (precise DLT triangulation)
                     try:
                         from triangulation import predict_wireframe_tracks
                         track_v, track_e = predict_wireframe_tracks(sample, min_views=3)
                         pred_v, pred_e = hybrid_merge(pred_v, pred_e, track_v, track_e, merge_radius=0.8)
                     except Exception as track_e_err:
                         print(f"  Track ensemble failed for {order_id}: {track_e_err}")
-            except Exception as e:
-                import traceback
-                print(f"  Predict failed for {order_id}:\n{traceback.format_exc()}")
-                pred_v, pred_e = empty_solution()
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
             solution.append({
                 "order_id": order_id,

 SNAP_RADIUS = 0.5
+def fuse_and_sample(sample, cfg, rng):
+    """Run point fusion + priority sampling on a raw dataset sample.
+    Returns a dict with xyz_norm, class_id, source, mask, center, scale, etc.
+    ready for model inference. Returns None if fusion fails.
     """
     try:
         scene = build_compact_scene(sample, cfg, rng)
     xyz = scene["xyz"]
     source = scene["source"]
     if len(xyz) < 10:
         return None
+    # Compute group_id and class_id (same as cache_scenes.py)
     behind_id = scene.get("behind_gest_id", np.full(len(xyz), -1, dtype=np.int16))
     group_id, class_id = _compute_group_and_class(
         scene["visible_src"], scene["visible_id"], behind_id, source)
+    # Normalize
+    center, scale = _compute_smart_center_scale(xyz, source)
+    # Priority sample
     indices, mask = _priority_sample(source, group_id, SEQ_LEN, COLMAP_QUOTA, DEPTH_QUOTA)
     xyz_norm = (xyz[indices] - center) / scale
     result = {
         "center": center.astype(np.float32),
         "scale": np.float32(scale),
     }
+    # Optional fields
+    if "behind_gest_id" in scene:
         behind = np.clip(scene["behind_gest_id"][indices].astype(np.int16), 0, None)
         result["behind"] = behind.astype(np.int64)
+    if "n_views_voted" in scene:
         result["n_views_voted"] = scene["n_views_voted"][indices].astype(np.float32)
+    if "vote_frac" in scene:
         result["vote_frac"] = scene["vote_frac"][indices].astype(np.float32)
+    # Visible src/id for snap post-processing
     result["visible_src"] = scene["visible_src"][indices].astype(np.int64)
     result["visible_id"] = scene["visible_id"][indices].astype(np.int64)
+    return result
 def load_model(checkpoint_path, device):
     return np.array(final_v), final_e
+def filter_by_colmap_support(pv, pe, sample, support_radius=0.6):
+    """Drop predicted vertices that have NO COLMAP point within support_radius.
+    Hallucinated vertices from the model (predicted in 3D space with no real
+    geometric evidence) typically appear in regions with no COLMAP point cloud.
+    Filtering by COLMAP-presence is a precision-only operation: real vertices
+    survive (the COLMAP cloud covers all reconstructed regions of the building),
+    spurious model outputs in empty space get dropped.
+    Returns the filtered (vertices, edges). On any failure or empty result,
+    falls back to the unfiltered input to avoid an empty submission.
     """
+    try:
+        if not isinstance(pv, np.ndarray) or len(pv) < 2 or len(pe) < 1:
+            return pv, pe
+        from hoho2025.example_solutions import convert_entry_to_human_readable
+        good = convert_entry_to_human_readable(sample)
+        colmap_rec = good.get('colmap') or good.get('colmap_binary')
+        if colmap_rec is None:
+            return pv, pe
+        colmap_xyz = np.array(
+            [p.xyz for p in colmap_rec.points3D.values()], dtype=np.float64
+        )
+        if len(colmap_xyz) < 5:
+            return pv, pe
+        from scipy.spatial import cKDTree
+        tree = cKDTree(colmap_xyz)
+        dists, _ = tree.query(np.asarray(pv, dtype=np.float64), k=1)
+        keep_mask = dists <= support_radius
+        if keep_mask.all():
+            return pv, pe  # nothing to filter
+        n_keep = int(keep_mask.sum())
+        # Require at least 2 vertices and 1 edge to remain after filtering.
+        if n_keep < 2:
+            return pv, pe
+        old_to_new = {int(old): new for new, old in enumerate(np.where(keep_mask)[0])}
+        new_pv = pv[keep_mask]
+        new_pe = []
+        for u, v in pe:
+            u, v = int(u), int(v)
+            if u in old_to_new and v in old_to_new and u != v:
+                new_pe.append((old_to_new[u], old_to_new[v]))
+        if len(new_pe) < 1:
+            return pv, pe  # do not drop all edges
+        return new_pv, new_pe
+    except Exception:
+        return pv, pe
 # ---------------------------------------------------------------------------
     # Point fusion config
     cfg = FuserConfig()
+    rng = np.random.RandomState(2718)
     # Process all samples
     solution = []
         for sample in tqdm(dataset[subset_name], desc=subset_name):
             order_id = sample["order_id"]
+            # Fuse + sample
+            fused = fuse_and_sample(sample, cfg, rng)
+            if fused is None:
+                pred_v, pred_e = empty_solution()
+            else:
+                try:
+                    pred_v, pred_e = predict_sample(fused, model, device)
+                    if torch.cuda.is_available():
+                        torch.cuda.empty_cache()
+                    # Apply handcrafted triangulation tracking to catch missing corners/edges
                     try:
                         from triangulation import predict_wireframe_tracks
+                        # Use min_views=3 for highly precise, conservative geometric tracks
                         track_v, track_e = predict_wireframe_tracks(sample, min_views=3)
                         pred_v, pred_e = hybrid_merge(pred_v, pred_e, track_v, track_e, merge_radius=0.8)
                     except Exception as track_e_err:
                         print(f"  Track ensemble failed for {order_id}: {track_e_err}")
+                    # Final precision pass: drop vertices with no nearby COLMAP
+                    # support. These are the model's hallucinations in regions
+                    # with no geometric evidence. Internal fallbacks ensure we
+                    # never end up with fewer than 2 vertices / 1 edge.
+                    pred_v, pred_e = filter_by_colmap_support(
+                        pred_v, pred_e, sample, support_radius=0.6,
+                    )
+                except Exception as e:
+                    import traceback
+                    print(f"  Predict failed for {order_id}:\n{traceback.format_exc()}")
+                    pred_v, pred_e = empty_solution()
+                    if torch.cuda.is_available():
+                        torch.cuda.empty_cache()
             solution.append({
                 "order_id": order_id,