#!/usr/bin/env python3
"""Portable benchmark matrix runner for the repo.

Runs a small reproducible subset of benchmark entrypoints and records
per-command logs plus a summary manifest. This is intended to answer:
which benchmark surfaces are healthy on this machine right now?
"""

from __future__ import annotations

import argparse
import json
import platform
import shlex
import subprocess
import sys
import time
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Dict, List, Tuple

from training.core.runtime_contract import ExecutionMode
from training.core.unified_backend import backend_capability_report


ROOT = Path(__file__).resolve().parent.parent
DEFAULT_OUT = ROOT / "verification_runs"


@dataclass
class MatrixStep:
    name: str
    command: List[str]
    returncode: int
    duration_s: float
    log_path: str
    requested_backend: str
    used_backend: str
    execution_mode: str
    artifact_paths: List[str]


def _timestamp() -> str:
    return time.strftime("%Y%m%d-%H%M%S")


def _step_backend(name: str) -> str:
    if "max" in name:
        return "max"
    return "cpu"


def _run_step(name: str, command: List[str], run_dir: Path, artifacts: List[str]) -> MatrixStep:
    log_path = run_dir / f"{name}.log"
    started = time.time()
    proc = subprocess.run(
        command,
        cwd=ROOT,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
    )
    duration = time.time() - started
    log_path.write_text(proc.stdout)
    capabilities = backend_capability_report()
    requested_backend = _step_backend(name)
    capability = capabilities.get(requested_backend, {})
    used_backend = requested_backend if capability.get("available") else str(
        capability.get("fallback_backend") or "cpu"
    )
    execution_mode = str(
        capability.get("execution_mode", ExecutionMode.CPU_FALLBACK.value)
    )
    return MatrixStep(
        name=name,
        command=command,
        returncode=proc.returncode,
        duration_s=duration,
        log_path=str(log_path.relative_to(ROOT)),
        requested_backend=requested_backend,
        used_backend=used_backend,
        execution_mode=execution_mode,
        artifact_paths=artifacts,
    )


def _commands(py: str, max_samples: int, epochs: int) -> List[Tuple[str, List[str], List[str]]]:
    patience = "2"
    batch_size = "16"
    return [
        (
            "text2cypher_v4",
            [
                py,
                "-m",
                "training.text2cypher_kan_v4",
                "--max-samples",
                str(max_samples),
                "--epochs",
                str(epochs),
                "--batch-size",
                batch_size,
                "--patience",
                patience,
                "--no-evolve",
            ],
            ["training/kan_bench_results/text2cypher_v4_results.json"],
        ),
        (
            "spider2_v2",
            [
                py,
                "-m",
                "training.spider2_kan_benchmark_v2",
                "--epochs",
                str(epochs),
                "--batch-size",
                batch_size,
                "--patience",
                patience,
            ],
            ["training/kan_bench_results/spider2_v2_results.json"],
        ),
        (
            "unified_dialect",
            [
                py,
                "-m",
                "training.unified_dialect_benchmark",
                "--epochs",
                str(epochs),
                "--batch-size",
                batch_size,
                "--patience",
                patience,
                "--max-hf",
                "0",
            ],
            ["training/kan_bench_results/unified_dialect_results.json"],
        ),
        (
            "swebench_lite",
            [
                py,
                "-m",
                "training.swebench_kan_benchmark",
                "--variant",
                "lite",
                "--split",
                "test",
                "--epochs",
                str(epochs),
                "--batch-size",
                batch_size,
                "--patience",
                patience,
                "--max-instances",
                str(max_samples),
            ],
            ["training/kan_bench_results/swebench_results.json"],
        ),
    ]


def main() -> int:
    parser = argparse.ArgumentParser(description="Portable benchmark matrix runner")
    parser.add_argument("--max-samples", type=int, default=16)
    parser.add_argument("--epochs", type=int, default=1)
    parser.add_argument("--out-dir", default=str(DEFAULT_OUT))
    parser.add_argument("--allow-failures", action="store_true")
    args = parser.parse_args()

    run_dir = Path(args.out_dir) / f"benchmark-matrix-{_timestamp()}"
    run_dir.mkdir(parents=True, exist_ok=True)

    py = sys.executable
    results: List[MatrixStep] = []

    for name, command, artifacts in _commands(py, args.max_samples, args.epochs):
        print(f"[run] {name}: {shlex.join(command)}", flush=True)
        result = _run_step(name, command, run_dir, artifacts)
        print(
            f"[done] {name}: rc={result.returncode} backend={result.used_backend} "
            f"mode={result.execution_mode} time={result.duration_s:.2f}s log={result.log_path}",
            flush=True,
        )
        results.append(result)
        if result.returncode != 0 and not args.allow_failures:
            break

    summary: Dict[str, object] = {
        "root": str(ROOT),
        "python": sys.executable,
        "python_version": sys.version,
        "platform": {
            "system": platform.system(),
            "release": platform.release(),
            "machine": platform.machine(),
        },
        "config": {
            "max_samples": args.max_samples,
            "epochs": args.epochs,
        },
        "capabilities": backend_capability_report(),
        "steps": [asdict(r) for r in results],
        "success": all(r.returncode == 0 for r in results) and len(results) == len(_commands(py, args.max_samples, args.epochs)),
    }
    summary_path = run_dir / "summary.json"
    summary_path.write_text(json.dumps(summary, indent=2))
    print(f"[summary] {summary_path.relative_to(ROOT)}", flush=True)
    return 0 if summary["success"] or args.allow_failures else 1


if __name__ == "__main__":
    raise SystemExit(main())