| """Local HSS evaluation harness for S23DR submissions. |
| |
| Streams N samples from the trainval dataset, runs the full pipeline |
| (fuse → predict → triangulation → 2D filter), computes HSS against the |
| ground truth, and reports mean / quartiles plus per-sample DIAG lines. |
| |
| Use this to validate any change BEFORE pushing to the leaderboard: |
| |
| python local_eval.py # default 50 samples |
| python local_eval.py 100 # 100 samples |
| python local_eval.py 100 --no-filter # skip the 2D edge filter |
| """ |
| import os |
| os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' |
|
|
| import sys |
| import time |
| import argparse |
| from pathlib import Path |
|
|
| SCRIPT_DIR = Path(__file__).resolve().parent |
| sys.path.insert(0, str(SCRIPT_DIR)) |
|
|
| import numpy as np |
| import torch |
| from datasets import load_dataset |
| from hoho2025.metric_helper import hss |
|
|
| import script |
| from s23dr_2026_example.point_fusion import FuserConfig |
|
|
|
|
| def parse_args(): |
| p = argparse.ArgumentParser() |
| p.add_argument("n_samples", type=int, nargs="?", default=50, |
| help="number of samples to evaluate") |
| p.add_argument("--no-filter", action="store_true", |
| help="disable the 2D edge filter (compare A/B)") |
| p.add_argument("--orphan-only", action="store_true", |
| help="skip 2D edge filter, apply only orphan-vertex cleanup") |
| p.add_argument("--strict-no-support", action="store_true", |
| help="use the asymmetric filter: drop only edges with NO support in any view") |
| p.add_argument("--no-tracks", action="store_true", |
| help="disable the triangulation track ensemble") |
| p.add_argument("--seed", type=int, default=2718, |
| help="rng seed for point fusion priority sampling") |
| p.add_argument("--label", type=str, default="run", |
| help="label printed in summary line") |
| p.add_argument("--conf-thresh", type=float, default=None, |
| help="override CONF_THRESH in script.py for this run") |
| p.add_argument("--snap-apex", action="store_true", |
| help="extend snap_to_point_cloud target_classes to include apex (class 0)") |
| p.add_argument("--vertex-refine", action="store_true", |
| help="apply vertex view-projection refinement after all post-process") |
| p.add_argument("--refine-max-pixel-dist", type=float, default=15.0, |
| help="vertex refine: max 2D pixel distance for corner matching") |
| p.add_argument("--refine-min-views", type=int, default=2, |
| help="vertex refine: min views with 2D match") |
| p.add_argument("--refine-max-move", type=float, default=0.5, |
| help="vertex refine: max 3D displacement in meters") |
| p.add_argument("--tta", action="store_true", |
| help="enable multi-seed TTA (3 priority-sample seeds, concat segments)") |
| p.add_argument("--tta-hungarian", action="store_true", |
| help="use Hungarian-matched averaging TTA (rejects unmatched segments)") |
| p.add_argument("--tta-min-passes", type=int, default=1, |
| help="hungarian TTA: drop anchor segments without this many supporting passes") |
| p.add_argument("--tta-seeds", type=str, default="2718,31415,42", |
| help="comma-separated priority-sample seeds for TTA") |
| p.add_argument("--tracks-only", action="store_true", |
| help="output ONLY the triangulation tracks (debug: baseline of tracks alone)") |
| p.add_argument("--fallback-to-tracks-when", type=str, default="", |
| help="fallback to tracks-only when pred_v > X and track_v < Y, e.g. 20,8") |
| p.add_argument("--hallu-filter", action="store_true", |
| help="filter vertices lacking BOTH COLMAP and gestalt-corner support") |
| p.add_argument("--hallu-colmap-radius", type=float, default=0.8, |
| help="hallucination filter: COLMAP support radius (meters)") |
| p.add_argument("--hallu-gestalt-px", type=float, default=20.0, |
| help="hallucination filter: gestalt corner pixel radius") |
| p.add_argument("--hallu-min-views", type=int, default=1, |
| help="hallucination filter: min views with gestalt support") |
| p.add_argument("--ensemble", type=str, default="", |
| help="comma-separated paths to ensemble checkpoints (additional to default checkpoint.pt)") |
| p.add_argument("--ensemble-min-passes", type=int, default=1, |
| help="ensemble: min cross-pass agreement to keep anchor segments") |
| p.add_argument("--bundle-adjust", action="store_true", |
| help="apply joint multi-view wireframe bundle adjustment") |
| p.add_argument("--ba-iter", type=int, default=50, |
| help="bundle-adjust Adam iterations") |
| p.add_argument("--ba-lr", type=float, default=0.003, |
| help="bundle-adjust learning rate") |
| p.add_argument("--ba-vertex-weight", type=float, default=1.0, |
| help="bundle-adjust vertex (corner pixel) loss weight") |
| p.add_argument("--ba-edge-weight", type=float, default=0.5, |
| help="bundle-adjust edge (edge pixel) loss weight") |
| p.add_argument("--ba-anchor-weight", type=float, default=200.0, |
| help="bundle-adjust anchor regularization weight") |
| p.add_argument("--ba-max-move", type=float, default=0.4, |
| help="bundle-adjust hard cap on vertex displacement (meters)") |
| p.add_argument("--tri-supplement", action="store_true", |
| help="supplement sparse predictions with loose (min_views=2) tracks") |
| p.add_argument("--tri-sparse-threshold", type=int, default=5, |
| help="tri-supplement: only activate when pred has < N vertices") |
| p.add_argument("--tri-merge-radius", type=float, default=0.7, |
| help="tri-supplement: absorb loose vertex into pred if within radius") |
| p.add_argument("--edge-classifier", type=str, default="", |
| help="path to edge_classifier.pt; if set, filter edges via learned model") |
| p.add_argument("--edge-classifier-v2", type=str, default="", |
| help="path to edge_classifier_v2.pt; if set, use v2 with image-mask features") |
| p.add_argument("--edge-classifier-v3", type=str, default="", |
| help="path to edge_classifier_v3.pt; v3 = CNN patches + v2 features") |
| p.add_argument("--edge-classifier-v4", type=str, default="", |
| help="path to edge_classifier_v4.pt; v4 = DINOv2 features + v2 features") |
| p.add_argument("--vertex-regressor-v4", type=str, default="", |
| help="path to vertex_regressor_v4.pt; learned 3D position refinement") |
| p.add_argument("--vertex-reg-max-move", type=float, default=0.4, |
| help="vertex regressor: clamp predicted offset to this magnitude") |
| p.add_argument("--vertex-classifier-v4", type=str, default="", |
| help="path to vertex_classifier_v4.pt; drops low-conf vertices") |
| p.add_argument("--vertex-class-thresh", type=float, default=0.3, |
| help="vertex classifier: keep if P(keep) >= threshold") |
| p.add_argument("--vertex-class-min-keep", type=float, default=0.85, |
| help="vertex classifier: never drop more than (1 - this) of vertices") |
| p.add_argument("--edge-class-thresh", type=float, default=0.5, |
| help="edge classifier: keep edges with P(keep) >= threshold") |
| p.add_argument("--edge-class-min-keep", type=float, default=0.5, |
| help="edge classifier: never drop more than (1 - this) of edges") |
| p.add_argument("--edge-fill", action="store_true", |
| help="enable edge filling from 2D mask evidence") |
| p.add_argument("--fill-min-views", type=int, default=2, |
| help="edge fill: min views supporting a new edge") |
| p.add_argument("--fill-min-frac", type=float, default=0.40, |
| help="edge fill: min support fraction along projected segment") |
| p.add_argument("--fill-max-length", type=float, default=5.0, |
| help="edge fill: max edge length in meters") |
| return p.parse_args() |
|
|
|
|
| def predict_one(sample, model, device, cfg, rng, |
| use_tracks=True, use_2d_filter=True, orphan_only=False, |
| strict_no_support=False, vertex_refine=False, |
| refine_kwargs=None, edge_fill=False, fill_kwargs=None, |
| tta=False, tta_seeds=None, |
| tracks_only=False, fallback_tracks=None): |
| """Run the full inference pipeline on one sample. Returns (pv, pe, diag).""" |
| diag = {"colmap": -1, "fused": 0, "track_v": 0, "track_e": 0, |
| "pred_v": 0, "pred_e": 0, "2dfilt_in": 0, "2dfilt_out": 0, |
| "status": "ok"} |
|
|
| try: |
| from hoho2025.example_solutions import convert_entry_to_human_readable |
| g = convert_entry_to_human_readable(sample) |
| rec = g.get('colmap') or g.get('colmap_binary') |
| if rec is not None: |
| diag["colmap"] = len(rec.points3D) |
| except Exception: |
| pass |
|
|
| if getattr(predict_one, "_ensemble_models", None): |
| try: |
| from ensemble import predict_sample_ensemble |
| |
| if tta: |
| seeds = tta_seeds or (2718, 31415, 42) |
| else: |
| seeds = (2718,) |
| pred_v, pred_e = predict_sample_ensemble( |
| sample, cfg, predict_one._ensemble_models, device, |
| seeds=tuple(seeds), |
| min_passes_for_keep=getattr(predict_one, "_ensemble_min_passes", 1), |
| ) |
| diag["fused"] = -1 |
| except Exception as e: |
| diag["status"] = f"ensemble_failed:{type(e).__name__}" |
| return *script.empty_solution(), diag |
| elif tta: |
| try: |
| seeds = tta_seeds or (2718, 31415, 42) |
| tta_method = ( |
| "predict_sample_tta_hungarian" |
| if getattr(predict_one, "_tta_hungarian", False) |
| else "predict_sample_tta" |
| ) |
| import tta as _tta_mod |
| fn = getattr(_tta_mod, tta_method) |
| if tta_method == "predict_sample_tta_hungarian": |
| pred_v, pred_e = fn( |
| sample, cfg, model, device, seeds=tuple(seeds), |
| min_passes_for_keep=getattr(predict_one, "_tta_min_passes", 1), |
| ) |
| else: |
| pred_v, pred_e = fn( |
| sample, cfg, model, device, seeds=tuple(seeds)) |
| diag["fused"] = -1 |
| except Exception as e: |
| diag["status"] = f"tta_failed:{type(e).__name__}" |
| return *script.empty_solution(), diag |
| else: |
| fused = script.fuse_and_sample(sample, cfg, rng) |
| if fused is None: |
| diag["status"] = "fuse_failed" |
| return *script.empty_solution(), diag |
| diag["fused"] = len(fused["xyz_norm"]) |
|
|
| try: |
| pred_v, pred_e = script.predict_sample(fused, model, device) |
| except Exception as e: |
| diag["status"] = f"predict_failed:{type(e).__name__}" |
| return *script.empty_solution(), diag |
|
|
| if use_tracks: |
| try: |
| from triangulation import predict_wireframe_tracks |
| track_v, track_e = predict_wireframe_tracks(sample, min_views=3) |
| diag["track_v"] = len(track_v) if track_v is not None else 0 |
| diag["track_e"] = len(track_e) if track_e is not None else 0 |
| pred_v, pred_e = script.hybrid_merge( |
| pred_v, pred_e, track_v, track_e, merge_radius=0.8) |
| except Exception as e: |
| diag["status"] = f"track_failed:{type(e).__name__}" |
|
|
| |
| |
| if getattr(predict_one, "_tri_supplement", False): |
| try: |
| from triangulate_supplement import supplement_sparse_with_loose_tracks |
| v_before = len(pred_v) if hasattr(pred_v, '__len__') else 0 |
| pred_v, pred_e = supplement_sparse_with_loose_tracks( |
| pred_v, pred_e, sample, |
| sparse_threshold=getattr(predict_one, "_tri_sparse_threshold", 5), |
| merge_radius=getattr(predict_one, "_tri_merge_radius", 0.7), |
| ) |
| v_after = len(pred_v) if hasattr(pred_v, '__len__') else 0 |
| diag["tri_added"] = v_after - v_before |
| except Exception as e: |
| diag["status"] = f"tri_failed:{type(e).__name__}" |
|
|
| diag["2dfilt_in"] = len(pred_e) if hasattr(pred_e, '__len__') else 0 |
| |
| |
| if vertex_refine: |
| try: |
| from vertex_refine import refine_vertices_view_projection |
| pv_before = pred_v |
| pred_v, pred_e = refine_vertices_view_projection( |
| pred_v, pred_e, sample, |
| **(refine_kwargs or {})) |
| if hasattr(pred_v, '__len__') and len(pred_v) == len(pv_before): |
| moved = int(np.sum(np.linalg.norm( |
| np.asarray(pred_v) - np.asarray(pv_before), axis=1) > 1e-6)) |
| diag["refined"] = moved |
| except Exception as e: |
| diag["status"] = f"refine_failed:{type(e).__name__}" |
|
|
| if orphan_only: |
| try: |
| from edge_2d_filter import drop_orphan_vertices |
| pred_v, pred_e = drop_orphan_vertices(pred_v, pred_e) |
| except Exception as e: |
| diag["status"] = f"orphan_failed:{type(e).__name__}" |
| elif strict_no_support: |
| try: |
| from edge_2d_filter import filter_edges_strict_no_support |
| pred_v, pred_e = filter_edges_strict_no_support( |
| pred_v, pred_e, sample, |
| max_support_thresh=0.10, dilate_px=4, sample_steps=20) |
| except Exception as e: |
| diag["status"] = f"strict_failed:{type(e).__name__}" |
| elif use_2d_filter: |
| try: |
| from edge_2d_filter import filter_edges_by_2d_support |
| pred_v, pred_e = filter_edges_by_2d_support( |
| pred_v, pred_e, sample, |
| min_views_support=2, min_pixel_frac=0.25, |
| dilate_px=4, sample_steps=20) |
| except Exception as e: |
| diag["status"] = f"2dfilt_failed:{type(e).__name__}" |
| diag["2dfilt_out"] = len(pred_e) if hasattr(pred_e, '__len__') else 0 |
|
|
| |
| vr = getattr(predict_one, "_vertex_regressor", None) |
| if vr is not None: |
| try: |
| from vertex_regressor_v4 import refine_vertices_with_regressor |
| pred_v, pred_e = refine_vertices_with_regressor( |
| pred_v, pred_e, sample, |
| vr["model"], vr["dino"], device=vr["dino_device"], |
| feature_mean=vr["mean"], feature_std=vr["std"], |
| edge_feat_mean=vr["edge_feat_mean"], edge_feat_std=vr["edge_feat_std"], |
| max_move_meters=vr["max_move_meters"], |
| ) |
| except Exception as e: |
| diag["status"] = f"vr_failed:{type(e).__name__}" |
|
|
| |
| vc = getattr(predict_one, "_vertex_classifier", None) |
| if vc is not None: |
| try: |
| from vertex_classifier_v4 import classify_vertices_v4 |
| v_before = len(pred_v) if hasattr(pred_v, '__len__') else 0 |
| pred_v, pred_e = classify_vertices_v4( |
| pred_v, pred_e, sample, |
| vc["model"], vc["dino"], device=vc["dino_device"], |
| threshold=vc["threshold"], |
| feature_mean=vc["mean"], feature_std=vc["std"], |
| edge_feat_mean=vc["edge_feat_mean"], edge_feat_std=vc["edge_feat_std"], |
| min_keep_frac=vc["min_keep_frac"], |
| ) |
| diag["vc_kept"] = (len(pred_v) if hasattr(pred_v, '__len__') else 0) |
| diag["vc_dropped"] = v_before - diag["vc_kept"] |
| except Exception as e: |
| diag["status"] = f"vc_failed:{type(e).__name__}" |
|
|
| |
| ec = getattr(predict_one, "_edge_classifier", None) |
| if ec is not None: |
| try: |
| e_before = len(pred_e) if hasattr(pred_e, '__len__') else 0 |
| ver = ec.get("version", 1) |
| if ver == 4: |
| from edge_classifier_v4 import classify_edges_v4 |
| pred_v, pred_e = classify_edges_v4( |
| pred_v, pred_e, sample, |
| ec["model"], ec["dino"], device=ec["dino_device"], |
| threshold=ec["threshold"], |
| feature_mean=ec["mean"], feature_std=ec["std"], |
| edge_feat_mean=ec["edge_feat_mean"], edge_feat_std=ec["edge_feat_std"], |
| min_keep_frac=ec["min_keep_frac"], |
| ) |
| else: |
| if ver == 3: |
| from edge_classifier_v3 import classify_edges_v3 as _cls_fn |
| elif ver == 2: |
| from edge_classifier_v2 import classify_edges_v2 as _cls_fn |
| else: |
| from edge_classifier import classify_edges as _cls_fn |
| pred_v, pred_e = _cls_fn( |
| pred_v, pred_e, sample, |
| ec["model"], threshold=ec["threshold"], |
| feature_mean=ec["mean"], feature_std=ec["std"], |
| min_keep_frac=ec["min_keep_frac"], |
| ) |
| diag["ec_kept"] = (len(pred_e) if hasattr(pred_e, '__len__') else 0) |
| diag["ec_dropped"] = e_before - diag["ec_kept"] |
| except Exception as e: |
| diag["status"] = f"ec_failed:{type(e).__name__}" |
|
|
| if edge_fill: |
| e_before = len(pred_e) if hasattr(pred_e, '__len__') else 0 |
| try: |
| from edge_fill import fill_missing_edges_from_2d |
| pred_v, pred_e = fill_missing_edges_from_2d( |
| pred_v, pred_e, sample, |
| **(fill_kwargs or {})) |
| diag["filled"] = (len(pred_e) if hasattr(pred_e, '__len__') else 0) - e_before |
| except Exception as e: |
| diag["status"] = f"fill_failed:{type(e).__name__}" |
|
|
| if getattr(predict_one, "_bundle_adjust", False): |
| try: |
| from bundle_wireframe import bundle_adjust_wireframe |
| ba_kwargs = getattr(predict_one, "_ba_kwargs", {}) |
| pred_v, pred_e = bundle_adjust_wireframe( |
| pred_v, pred_e, sample, **ba_kwargs) |
| except Exception as e: |
| diag["status"] = f"ba_failed:{type(e).__name__}" |
|
|
| |
| if getattr(predict_one, "_hallu_filter", False): |
| try: |
| from hallucination_filter import filter_hallucinated_vertices |
| n_before = len(pred_v) if hasattr(pred_v, '__len__') else 0 |
| pred_v, pred_e = filter_hallucinated_vertices( |
| pred_v, pred_e, sample, |
| colmap_radius=getattr(predict_one, "_hallu_colmap_radius", 0.8), |
| gestalt_radius_px=getattr(predict_one, "_hallu_gestalt_px", 20.0), |
| min_views_with_gestalt=getattr(predict_one, "_hallu_min_views", 1), |
| ) |
| n_after = len(pred_v) if hasattr(pred_v, '__len__') else 0 |
| diag["hallu_dropped"] = n_before - n_after |
| except Exception as e: |
| diag["status"] = f"hallu_failed:{type(e).__name__}" |
|
|
| |
| if tracks_only: |
| |
| try: |
| from triangulation import predict_wireframe_tracks |
| track_v, track_e = predict_wireframe_tracks(sample, min_views=3) |
| if track_v is not None and track_e is not None and len(track_v) >= 2 and len(track_e) >= 1: |
| pred_v = np.asarray(track_v, dtype=np.float32) |
| pred_e = list(track_e) |
| diag["status"] = "tracks_only_forced" |
| except Exception: |
| pass |
| elif fallback_tracks is not None: |
| pred_v_thresh, track_v_thresh = fallback_tracks |
| n_pv = len(pred_v) if hasattr(pred_v, '__len__') else 0 |
| if n_pv > pred_v_thresh and diag.get("track_v", 0) < track_v_thresh: |
| try: |
| from triangulation import predict_wireframe_tracks |
| track_v, track_e = predict_wireframe_tracks(sample, min_views=3) |
| if track_v is not None and track_e is not None and len(track_v) >= 2 and len(track_e) >= 1: |
| pred_v = np.asarray(track_v, dtype=np.float32) |
| pred_e = list(track_e) |
| diag["status"] = "fallback_to_tracks" |
| except Exception: |
| pass |
|
|
| diag["pred_v"] = len(pred_v) if hasattr(pred_v, '__len__') else 0 |
| diag["pred_e"] = len(pred_e) if hasattr(pred_e, '__len__') else 0 |
| return pred_v, pred_e, diag |
|
|
|
|
| def main(): |
| args = parse_args() |
|
|
| print(f"=== Local eval | {args.n_samples} samples | " |
| f"tracks={'on' if not args.no_tracks else 'OFF'} | " |
| f"2dfilt={'on' if not args.no_filter else 'OFF'} | " |
| f"label={args.label} ===") |
|
|
| device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") |
| print(f"Device: {device}") |
|
|
| ckpt_path = SCRIPT_DIR / "checkpoint.pt" |
| if not ckpt_path.exists() or ckpt_path.stat().st_size < 1000: |
| import urllib.request |
| url = ("https://huggingface.co/jacklangerman/s23dr-2026-submission/" |
| "resolve/main/checkpoint.pt") |
| print(f"Downloading checkpoint.pt ...") |
| urllib.request.urlretrieve(url, str(ckpt_path)) |
|
|
| model = script.load_model(ckpt_path, device) |
| print(f"Model: {sum(p.numel() for p in model.parameters()):,} params") |
|
|
| ensemble_models = None |
| if args.ensemble: |
| from ensemble import load_two_checkpoints |
| extra_paths = [p.strip() for p in args.ensemble.split(",") if p.strip()] |
| ensemble_models = [model] + load_two_checkpoints(extra_paths, device) |
| print(f"Ensemble: {len(ensemble_models)} models") |
|
|
| if args.edge_classifier: |
| from edge_classifier import load_classifier |
| ec_model, ec_mean, ec_std = load_classifier(args.edge_classifier, device="cpu") |
| print(f"Edge classifier loaded from {args.edge_classifier} " |
| f"(thresh={args.edge_class_thresh}, min_keep={args.edge_class_min_keep})") |
| predict_one._edge_classifier_loaded = { |
| "model": ec_model, |
| "mean": ec_mean.cpu().numpy() if hasattr(ec_mean, 'cpu') else ec_mean, |
| "std": ec_std.cpu().numpy() if hasattr(ec_std, 'cpu') else ec_std, |
| "threshold": args.edge_class_thresh, |
| "min_keep_frac": args.edge_class_min_keep, |
| "version": 1, |
| } |
|
|
| if args.edge_classifier_v2: |
| from edge_classifier_v2 import load_classifier_v2 |
| ec_model, ec_mean, ec_std = load_classifier_v2(args.edge_classifier_v2, device="cpu") |
| print(f"Edge classifier V2 loaded from {args.edge_classifier_v2}") |
| predict_one._edge_classifier_loaded = { |
| "model": ec_model, |
| "mean": ec_mean.cpu().numpy() if hasattr(ec_mean, 'cpu') else ec_mean, |
| "std": ec_std.cpu().numpy() if hasattr(ec_std, 'cpu') else ec_std, |
| "threshold": args.edge_class_thresh, |
| "min_keep_frac": args.edge_class_min_keep, |
| "version": 2, |
| } |
|
|
| if args.edge_classifier_v3: |
| from edge_classifier_v3 import load_classifier_v3 |
| ec_model, ec_mean, ec_std = load_classifier_v3(args.edge_classifier_v3, device="cpu") |
| print(f"Edge classifier V3 (CNN) loaded from {args.edge_classifier_v3}") |
| predict_one._edge_classifier_loaded = { |
| "model": ec_model, |
| "mean": ec_mean.cpu().numpy() if hasattr(ec_mean, 'cpu') else ec_mean, |
| "std": ec_std.cpu().numpy() if hasattr(ec_std, 'cpu') else ec_std, |
| "threshold": args.edge_class_thresh, |
| "min_keep_frac": args.edge_class_min_keep, |
| "version": 3, |
| } |
|
|
| if args.vertex_regressor_v4: |
| from vertex_regressor_v4 import load_regressor_v4 |
| from edge_classifier_v4 import get_dino_model |
| vr_model, vrg_mean, vrg_std, vre_mean, vre_std = load_regressor_v4( |
| args.vertex_regressor_v4, device="cpu") |
| dino_vr = get_dino_model(device=device) |
| print(f"Vertex regressor V4 loaded from {args.vertex_regressor_v4}") |
| predict_one._vertex_regressor_loaded = { |
| "model": vr_model, |
| "dino": dino_vr, |
| "dino_device": device, |
| "mean": vrg_mean.cpu().numpy() if hasattr(vrg_mean, "cpu") else vrg_mean, |
| "std": vrg_std.cpu().numpy() if hasattr(vrg_std, "cpu") else vrg_std, |
| "edge_feat_mean": vre_mean.cpu().numpy() if hasattr(vre_mean, "cpu") else vre_mean, |
| "edge_feat_std": vre_std.cpu().numpy() if hasattr(vre_std, "cpu") else vre_std, |
| "max_move_meters": args.vertex_reg_max_move, |
| } |
|
|
| if args.vertex_classifier_v4: |
| from vertex_classifier_v4 import load_classifier_v4 as load_vc4 |
| from edge_classifier_v4 import get_dino_model |
| vc_model, vg_mean, vg_std, ve_mean, ve_std = load_vc4(args.vertex_classifier_v4, device="cpu") |
| dino_v = get_dino_model(device=device) |
| print(f"Vertex classifier V4 loaded from {args.vertex_classifier_v4}") |
| predict_one._vertex_classifier_loaded = { |
| "model": vc_model, |
| "dino": dino_v, |
| "dino_device": device, |
| "mean": vg_mean.cpu().numpy() if hasattr(vg_mean, "cpu") else vg_mean, |
| "std": vg_std.cpu().numpy() if hasattr(vg_std, "cpu") else vg_std, |
| "edge_feat_mean": ve_mean.cpu().numpy() if hasattr(ve_mean, "cpu") else ve_mean, |
| "edge_feat_std": ve_std.cpu().numpy() if hasattr(ve_std, "cpu") else ve_std, |
| "threshold": args.vertex_class_thresh, |
| "min_keep_frac": args.vertex_class_min_keep, |
| } |
|
|
| if args.edge_classifier_v4: |
| from edge_classifier_v4 import load_classifier_v4, get_dino_model |
| ec_model, g_mean, g_std, e_mean, e_std = load_classifier_v4(args.edge_classifier_v4, device="cpu") |
| |
| dino = get_dino_model(device=device) |
| print(f"Edge classifier V4 (DINOv2) loaded from {args.edge_classifier_v4}") |
| predict_one._edge_classifier_loaded = { |
| "model": ec_model, |
| "dino": dino, |
| "dino_device": device, |
| "mean": g_mean.cpu().numpy() if hasattr(g_mean, 'cpu') else g_mean, |
| "std": g_std.cpu().numpy() if hasattr(g_std, 'cpu') else g_std, |
| "edge_feat_mean": e_mean.cpu().numpy() if hasattr(e_mean, 'cpu') else e_mean, |
| "edge_feat_std": e_std.cpu().numpy() if hasattr(e_std, 'cpu') else e_std, |
| "threshold": args.edge_class_thresh, |
| "min_keep_frac": args.edge_class_min_keep, |
| "version": 4, |
| } |
|
|
| if args.conf_thresh is not None: |
| print(f"Overriding script.CONF_THRESH: {script.CONF_THRESH} -> {args.conf_thresh}") |
| script.CONF_THRESH = args.conf_thresh |
|
|
| if args.snap_apex: |
| print("Monkey-patching snap_to_point_cloud to include apex (class 0)") |
| from s23dr_2026_example import postprocess_v2 as _pp |
| _orig_snap = _pp.snap_to_point_cloud |
| def _snap_with_apex(vertices, xyz, class_id, snap_radius=0.5, target_classes=None): |
| return _orig_snap(vertices, xyz, class_id, snap_radius=snap_radius, |
| target_classes=target_classes or [0, 1, 2]) |
| _pp.snap_to_point_cloud = _snap_with_apex |
| script.snap_to_point_cloud = _snap_with_apex |
|
|
| ds = load_dataset( |
| 'usm3d/hoho22k_2026_trainval', split='train', |
| streaming=True, trust_remote_code=True) |
|
|
| cfg = FuserConfig() |
| rng = np.random.RandomState(args.seed) |
|
|
| scores = [] |
| diags = [] |
| t_start = time.time() |
|
|
| for idx, sample in enumerate(ds): |
| if idx >= args.n_samples: |
| break |
| order_id = sample.get('order_id', str(idx)) |
| gt_v = sample.get('wf_vertices') |
| gt_e = sample.get('wf_edges') |
| if gt_v is None or gt_e is None: |
| print(f"[{idx}] {order_id}: SKIP (no GT)") |
| continue |
|
|
| try: |
| refine_kwargs = { |
| "max_pixel_dist": args.refine_max_pixel_dist, |
| "min_views": args.refine_min_views, |
| "max_move_meters": args.refine_max_move, |
| } |
| fill_kwargs = { |
| "min_views_support": args.fill_min_views, |
| "min_pixel_frac": args.fill_min_frac, |
| "max_edge_length_meters": args.fill_max_length, |
| } |
| tta_seeds_tuple = tuple(int(s) for s in args.tta_seeds.split(",")) |
| predict_one._tta_hungarian = args.tta_hungarian |
| predict_one._tta_min_passes = args.tta_min_passes |
| predict_one._hallu_filter = args.hallu_filter |
| predict_one._hallu_colmap_radius = args.hallu_colmap_radius |
| predict_one._hallu_gestalt_px = args.hallu_gestalt_px |
| predict_one._hallu_min_views = args.hallu_min_views |
| predict_one._ensemble_models = ensemble_models |
| predict_one._ensemble_min_passes = args.ensemble_min_passes |
| predict_one._bundle_adjust = args.bundle_adjust |
| predict_one._ba_kwargs = { |
| "n_iter": args.ba_iter, |
| "lr": args.ba_lr, |
| "vertex_weight": args.ba_vertex_weight, |
| "edge_weight": args.ba_edge_weight, |
| "anchor_weight": args.ba_anchor_weight, |
| "max_move_meters": args.ba_max_move, |
| } |
| predict_one._tri_supplement = args.tri_supplement |
| predict_one._tri_sparse_threshold = args.tri_sparse_threshold |
| predict_one._tri_merge_radius = args.tri_merge_radius |
| predict_one._edge_classifier = getattr(predict_one, "_edge_classifier_loaded", None) |
| predict_one._vertex_classifier = getattr(predict_one, "_vertex_classifier_loaded", None) |
| predict_one._vertex_regressor = getattr(predict_one, "_vertex_regressor_loaded", None) |
| fallback_tracks = None |
| if args.fallback_to_tracks_when: |
| pv_thr, tv_thr = args.fallback_to_tracks_when.split(",") |
| fallback_tracks = (int(pv_thr), int(tv_thr)) |
| pred_v, pred_e, diag = predict_one( |
| sample, model, device, cfg, rng, |
| use_tracks=not args.no_tracks, |
| use_2d_filter=not args.no_filter, |
| orphan_only=args.orphan_only, |
| strict_no_support=args.strict_no_support, |
| vertex_refine=args.vertex_refine, |
| refine_kwargs=refine_kwargs, |
| edge_fill=args.edge_fill, |
| fill_kwargs=fill_kwargs, |
| tta=args.tta, |
| tta_seeds=tta_seeds_tuple, |
| tracks_only=args.tracks_only, |
| fallback_tracks=fallback_tracks) |
| if torch.backends.mps.is_available(): |
| torch.mps.empty_cache() |
|
|
| res = hss(np.asarray(pred_v), pred_e, np.asarray(gt_v), gt_e) |
| score = float(res.hss) if hasattr(res, 'hss') else float(res) |
| scores.append(score) |
| diags.append({"order_id": order_id, "score": score, **diag}) |
|
|
| print(f"[{idx:3d}] {order_id} hss={score:.4f} " |
| f"colmap={diag['colmap']} fused={diag['fused']} " |
| f"track={diag['track_v']}/{diag['track_e']} " |
| f"pred={diag['pred_v']}/{diag['pred_e']} " |
| f"2dfilt={diag['2dfilt_in']}->{diag['2dfilt_out']} " |
| f"{diag['status']}") |
| except Exception as e: |
| import traceback |
| print(f"[{idx}] {order_id} EVAL CRASH: {e}") |
| traceback.print_exc() |
|
|
| elapsed = time.time() - t_start |
| scores = np.array(scores) |
| if len(scores) == 0: |
| print("\nNo valid scores.") |
| return |
| print(f"\n=== {args.label} | {len(scores)}/{args.n_samples} samples | " |
| f"{elapsed:.0f}s ({elapsed/max(len(scores),1):.1f}s/sample) ===") |
| print(f" hss_mean = {scores.mean():.4f}") |
| print(f" hss_q5 = {np.quantile(scores, 0.05):.4f}") |
| print(f" hss_q25 = {np.quantile(scores, 0.25):.4f}") |
| print(f" hss_q50 = {np.quantile(scores, 0.50):.4f}") |
| print(f" hss_q75 = {np.quantile(scores, 0.75):.4f}") |
| print(f" hss_q95 = {np.quantile(scores, 0.95):.4f}") |
| print(f" hss_min = {scores.min():.4f} hss_max = {scores.max():.4f}") |
|
|
| |
| import json |
| out_path = SCRIPT_DIR / f"local_eval_{args.label}.json" |
| with out_path.open("w") as f: |
| json.dump(diags, f, indent=2) |
| print(f" Per-sample details: {out_path}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|