"""OpenBMB MiniCPM gate check — drops a zero-shot MiniCPM planner into the IDENTICAL
hospital verify(tau=0.5)+union gate the scaling arm uses, and scores it against the
shipped Qwen3-4B fine-tune bar (union 0.905@0.413).

Reuses eval/scaling_arm.py wholesale (make_cloud_planner hits /api/chat, so a LOCAL
ollama model id works through the same contract — no cloud proxy involved here). Does
not modify scaling_arm's existing rows; writes eval/results/minicpm_check.json.

    uv run python -m eval.minicpm_check
"""

from __future__ import annotations

import json
import time
from pathlib import Path

from scrubdata.model_planner import make_batched_planner

from .run_real import _ensure_data, _load
from .scaling_arm import (PRIOR_4B_ROW, RESULTS, make_cloud_planner, score_point)

MODEL = "hf.co/openbmb/MiniCPM3-4B-GGUF:Q4_K_M"
PARAMS_B = 4
FAMILY = "openbmb/minicpm3 (zero-shot, local)"
TAU = 0.5


def main() -> None:
    _ensure_data()
    dirty, clean = _load()

    print(f"=== {MODEL} ({FAMILY}, {PARAMS_B}B) — hospital, 509 real errors ===",
          flush=True)
    # local model, served by the same daemon at localhost:11434 via /api/chat;
    # bigger num_predict so a chatty 4B isn't truncated mid-JSON
    raw, stats = make_cloud_planner(MODEL, num_predict=4000)
    batched = make_batched_planner(raw, batch_size=4)
    t0 = time.time()
    try:
        raw_plan = batched(dirty)
    except Exception as e:
        raw_plan = {"__error__": str(e)[:200]}
    runtime = round(time.time() - t0, 1)
    validity = round(stats["valid"] / stats["calls"], 3) if stats["calls"] else 0.0

    plan_path = RESULTS / "scaling_minicpm3_4b_hospital_raw_plan.json"
    json.dump(raw_plan, open(plan_path, "w"))

    row = {"model": MODEL, "params_b": PARAMS_B, "family": FAMILY,
           "validity": validity, "runtime_s": runtime,
           "n_calls": stats["calls"], "errors": stats["errors"][:5],
           "provenance": "this run (ZERO-SHOT, local RTX 3060 Ti via ollama "
                         "hf.co/openbmb/MiniCPM3-4B-GGUF:Q4_K_M, num_predict=4000)",
           "raw_plan": str(plan_path.relative_to(RESULTS.parent.parent))}
    if isinstance(raw_plan, dict) and "__error__" not in raw_plan:
        row.update(score_point(dirty, clean, raw_plan, tau=TAU))
    else:
        row.update({"gated_prec": None, "gated_cov": None,
                    "union_prec": None, "union_cov": None,
                    "verifier_kept": None, "verifier_dropped": None,
                    "note": "planner produced no plan"})

    print(f"  validity {validity}  runtime {runtime}s  "
          f"gated {row.get('gated_prec')}/{row.get('gated_cov')}  "
          f"union {row.get('union_prec')}/{row.get('union_cov')}  "
          f"kept/dropped {row.get('verifier_kept')}/{row.get('verifier_dropped')}",
          flush=True)

    bar = PRIOR_4B_ROW
    out = {
        "task": "OpenBMB MiniCPM gate check — hospital 509 real errors, tau=0.5, "
                "v6 gate protocol (batched raw plan -> verify -> union). "
                "ZERO-SHOT MiniCPM vs FINE-TUNED Qwen3-4B bar.",
        "bar": {"model": bar["model"], "union_prec": bar["union_prec"],
                "union_cov": bar["union_cov"], "gated_prec": bar["gated_prec"],
                "gated_cov": bar["gated_cov"]},
        "rows": [dict(bar), row],
    }
    json.dump(out, open(RESULTS / "minicpm_check.json", "w"), indent=1)
    print("\nwritten:", RESULTS / "minicpm_check.json")


if __name__ == "__main__":
    main()