"""WS1 deliverable: precision-coverage curve for the verified planner on real errors.

Sweeps the verifier threshold tau over the hospital benchmark and reports, per tau:
  precision  = repair_prec  (of the cells we changed, how many match gold)
  coverage   = repair_recall (of the real errors, how many we fixed)

GATE (publication plan): precision >= 0.70 at coverage >= 0.30. The verified planner
abstains on low-confidence merges instead of committing them — selective prediction at
the plan level, contract-preserving (dropped entries become review flags).

    uv run python -m eval.precision_curve                 # grounded heuristic planner
    uv run python -m eval.precision_curve --plan plan.json # pre-captured model plan
    uv run python -m eval.precision_curve --plan plan.json --union  # production pipeline
"""

from __future__ import annotations

import argparse
import json

from scrubdata.executor import apply_plan
from scrubdata.planner import mock_plan
from scrubdata.verifier import union_plans, verify_plan

from .run_real import _ensure_data, _load
from .run_real_multi import score as _cn_score          # churn-neutral scoring

TAUS = [0.0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95]


def _repairs_only(plan: dict) -> dict:
    """Keep only the REPAIR decisions (canonicalize mappings); drop format/table ops.
    This is the Baran-comparable protocol: precision over error-repair decisions,
    not over convention standardization (dates->ISO etc., which the raw benchmark
    stores as text and would flood the denominator)."""
    import copy
    out = copy.deepcopy(plan)
    out["table_operations"] = []
    for c in out.get("columns", []):
        c["operations"] = [o for o in c.get("operations", [])
                           if o.get("op") == "canonicalize_categories"]
    out["columns"] = [c for c in out.get("columns", []) if c.get("operations")]
    return out


def curve(dirty, clean, base_plan: dict, label: str, union: bool = False) -> list[dict]:
    rows = []
    heuristic = mock_plan(dirty) if union else None
    print(f"\n=== precision-coverage: {label} (hospital, 509 real errors) ===")
    print(f"{'tau':>5}{'precision':>11}{'coverage':>10}{'changed':>9}{'fixed':>7}")
    print("-" * 44)
    for tau in TAUS:
        plan = verify_plan(dirty, base_plan, tau=tau)
        if union:                       # the production (active.py) composition
            plan = union_plans(plan, heuristic)
        plan = _repairs_only(plan)
        cleaned, _ = apply_plan(dirty, plan)
        m = _cn_score(dirty, clean, cleaned)
        rows.append({"tau": tau, "precision": m["precision"], "coverage": m["recall"],
                     "changed": m["_changed"], "fixed": m["_fixed"]})
        gate = "  <-- GATE" if m["precision"] >= 0.70 and m["recall"] >= 0.30 else ""
        print(f"{tau:>5.2f}{m['precision']:>11.3f}{m['recall']:>10.3f}"
              f"{m['_changed']:>9}{m['_fixed']:>7}{gate}")
    ok = [r for r in rows if r["precision"] >= 0.70 and r["coverage"] >= 0.30]
    best = max(ok, key=lambda r: r["coverage"]) if ok else max(rows, key=lambda r: (r["precision"] >= 0.70) * r["coverage"])
    if ok:
        print(f"\nGATE: PASS at tau={best['tau']} (precision {best['precision']:.3f}, "
              f"coverage {best['coverage']:.3f})")
    else:
        hi = max(rows, key=lambda r: r["precision"])
        print(f"\nGATE: not cleared — max precision {hi['precision']:.3f} at "
              f"coverage {hi['coverage']:.3f} (tau={hi['tau']})")
    return rows


def main() -> None:
    ap = argparse.ArgumentParser()
    ap.add_argument("--plan", type=str, default=None,
                    help="path to a captured raw plan JSON (e.g. the v6 model's)")
    ap.add_argument("--union", action="store_true",
                    help="union each verified plan with the grounded heuristic "
                         "(the shipped active.py pipeline)")
    ap.add_argument("--out", type=str, default=None, help="write curve rows to JSON")
    args = ap.parse_args()

    _ensure_data()
    dirty, clean = _load()
    if args.plan:
        base_plan = json.load(open(args.plan))
        label = f"model plan ({args.plan})" + (" + heuristic union" if args.union else "")
    else:
        base_plan = mock_plan(dirty)
        label = "grounded heuristic"
    rows = curve(dirty, clean, base_plan, label, union=args.union)
    if args.out:
        json.dump(rows, open(args.out, "w"), indent=1)
        print(f"curve written to {args.out}")


if __name__ == "__main__":
    main()