TTA: 3-pass test-time augmentation with priority-sample seed variation

Replace the single learned-pipeline pass + losing classical sklearn
ensemble (commit 3118106 regressed -0.018) with K=3 learned passes
that share the same model checkpoint but use different priority-sample
RNG seeds. Each pass produces a different mix of depth-unprojected
points, so the model sees a slightly different input — model averaging
on a fixed checkpoint is the standard inference-time technique for
1-3% F1 gain.

Aggregation: union the 3 (vertices, edges) outputs via ensemble_merge
with a tight 0.3m merge radius (same vertex predicted by multiple
passes consolidates; pass-specific noise stays separate).

The classical predict_wireframe_tracks step (DLT triangulation) is
kept on top of the unioned learned output via hybrid_merge — this part
worked in the 0.4584 baseline and is unchanged.

Removed: the sklearn-classical ensemble (USE_TRACK_ENSEMBLE-disabled
predict_wireframe_sklearn). It added noise from 2D semantic vertex
detection that the learned pipeline had not produced.

Runtime: ~3x learned forward passes per sample. Each was ~1s, so we
expect ~3s/sample x 100 samples = 300s extra. Well within the 2h budget.

Files changed (1) hide show

script.py +42 -58

script.py CHANGED Viewed

@@ -434,7 +434,14 @@ if __name__ == "__main__":
     # Point fusion config
     cfg = FuserConfig()
-    rng = np.random.RandomState(2718)
     # Process all samples
     solution = []
@@ -447,70 +454,47 @@ if __name__ == "__main__":
         for sample in tqdm(dataset[subset_name], desc=subset_name):
             order_id = sample["order_id"]
-            # Fuse + sample
-            fused = fuse_and_sample(sample, cfg, rng)
-            if fused is None:
-                pred_v, pred_e = empty_solution()
-            else:
-                try:
-                    pred_v, pred_e = predict_sample(fused, model, device)
-                    if torch.cuda.is_available():
-                        torch.cuda.empty_cache()
-                    # Apply handcrafted triangulation tracking to catch missing corners/edges
                     try:
                         from triangulation import predict_wireframe_tracks
-                        # Use min_views=3 for highly precise, conservative geometric tracks
                         track_v, track_e = predict_wireframe_tracks(sample, min_views=3)
                         pred_v, pred_e = hybrid_merge(pred_v, pred_e, track_v, track_e, merge_radius=0.8)
                     except Exception as track_e_err:
                         print(f"  Track ensemble failed for {order_id}: {track_e_err}")
-                    # Pipeline ensemble: also run the classical sklearn pipeline
-                    # but disable its heaviest features. predict_wireframe_tracks
-                    # is already called above by hybrid_merge — calling it again
-                    # via USE_TRACK_ENSEMBLE/USE_TRACKS_AS_VERTICES doubles the
-                    # slowest step and causes timeout. Winner candidates and line
-                    # cloud are also expensive. We keep just the core classical
-                    # signal: 2D semantic vertex detection + RANSAC depth fit +
-                    # 3D unprojection + sklearn edge classifier.
-                    try:
-                        import sklearn_submission as _skl
-                        _saved_flags = (
-                            _skl.USE_TRACK_ENSEMBLE,
-                            _skl.USE_TRACKS_AS_VERTICES,
-                            _skl.USE_WINNER_CANDIDATES,
-                            _skl.USE_LINE_EDGES,
-                        )
-                        _skl.USE_TRACK_ENSEMBLE = False
-                        _skl.USE_TRACKS_AS_VERTICES = False
-                        _skl.USE_WINNER_CANDIDATES = False
-                        _skl.USE_LINE_EDGES = False
-                        try:
-                            skl_v, skl_e = _skl.predict_wireframe_sklearn(sample)
-                        finally:
-                            (
-                                _skl.USE_TRACK_ENSEMBLE,
-                                _skl.USE_TRACKS_AS_VERTICES,
-                                _skl.USE_WINNER_CANDIDATES,
-                                _skl.USE_LINE_EDGES,
-                            ) = _saved_flags
-                        if isinstance(pred_v, np.ndarray) and len(pred_v) >= 1 and \
-                                skl_v is not None and len(skl_v) >= 1:
-                            pred_v, pred_e = ensemble_merge(
-                                pred_v, pred_e, skl_v, skl_e,
-                                vertex_merge_radius=0.4,
-                            )
-                    except Exception as ens_err:
-                        print(f"  Ensemble merge failed for {order_id}: {ens_err}")
-                except Exception as e:
-                    import traceback
-                    print(f"  Predict failed for {order_id}:\n{traceback.format_exc()}")
-                    pred_v, pred_e = empty_solution()
-                    if torch.cuda.is_available():
-                        torch.cuda.empty_cache()
             solution.append({
                 "order_id": order_id,

     # Point fusion config
     cfg = FuserConfig()
+    # Test-time augmentation: how many learned-pipeline passes per sample.
+    # Each pass uses a different priority-sample seed so the input point
+    # cloud (especially the depth-unprojected portion) varies. We then
+    # union the segment predictions across passes via ensemble_merge.
+    TTA_PASSES = 3
+    TTA_BASE_SEED = 2718
+    TTA_MERGE_RADIUS = 0.3  # tight: same vertex predicted by multiple passes
     # Process all samples
     solution = []
         for sample in tqdm(dataset[subset_name], desc=subset_name):
             order_id = sample["order_id"]
+            try:
+                # ---- TTA: run the learned pipeline K times, union outputs
+                tta_outputs = []
+                for k in range(TTA_PASSES):
+                    rng_k = np.random.RandomState(TTA_BASE_SEED + k * 1000)
+                    fused_k = fuse_and_sample(sample, cfg, rng_k)
+                    if fused_k is None:
+                        continue
+                    try:
+                        pv_k, pe_k = predict_sample(fused_k, model, device)
+                        if isinstance(pv_k, np.ndarray) and len(pv_k) >= 2 and len(pe_k) >= 1:
+                            tta_outputs.append((pv_k, pe_k))
+                    except Exception as tta_e:
+                        print(f"  TTA pass {k} failed for {order_id}: {tta_e}")
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                if not tta_outputs:
+                    pred_v, pred_e = empty_solution()
+                else:
+                    pred_v, pred_e = tta_outputs[0]
+                    for pv_k, pe_k in tta_outputs[1:]:
+                        pred_v, pred_e = ensemble_merge(
+                            pred_v, pred_e, pv_k, pe_k,
+                            vertex_merge_radius=TTA_MERGE_RADIUS,
+                        )
+                    # ---- Classical track ensemble (precise DLT triangulation)
                     try:
                         from triangulation import predict_wireframe_tracks
                         track_v, track_e = predict_wireframe_tracks(sample, min_views=3)
                         pred_v, pred_e = hybrid_merge(pred_v, pred_e, track_v, track_e, merge_radius=0.8)
                     except Exception as track_e_err:
                         print(f"  Track ensemble failed for {order_id}: {track_e_err}")
+            except Exception as e:
+                import traceback
+                print(f"  Predict failed for {order_id}:\n{traceback.format_exc()}")
+                pred_v, pred_e = empty_solution()
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
             solution.append({
                 "order_id": order_id,