"""Model Court benchmark: sequential vs. parallel first-wave Role Agent execution."""

from __future__ import annotations

import argparse
import statistics
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent.parent))

from core.court import run_model_court_benchmark
from core.court_client import MockCourtClient, VLLMCourtClient

CASE = """\
Claim title: Escalation case with disputed slip-and-fall evidence
Claim amount: $50,000
The claimant slipped on a wet floor. The policy was active. No witnesses.
Store manager says no camera. Prior claim two years ago. Medical bills submitted.
"""


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--mock", action="store_true")
    parser.add_argument("--endpoint", default="http://localhost:8000/v1")
    parser.add_argument("--model", default="Qwen/Qwen2.5-14B-Instruct")
    parser.add_argument("--runs", type=int, default=1)
    args = parser.parse_args()

    mode = "mock" if args.mock else "vllm"
    first_wave_speedups: list[float] = []
    full_speedups: list[float] = []

    print("\nModel Court Benchmark")
    print("=" * 60)
    print(f"Model: {args.model}")
    print(f"Endpoint mode: {mode}")
    for index in range(args.runs):
        client = MockCourtClient() if args.mock else VLLMCourtClient(args.endpoint, args.model)
        result = run_model_court_benchmark(
            case_text=CASE,
            client=client,
            model_name=args.model,
            endpoint_mode=mode,
            case_title="Benchmark case",
        )
        benchmark = result.benchmark
        if benchmark is None:
            raise RuntimeError("Benchmark artifact was not produced.")
        first_wave_speedups.append(benchmark.first_wave_speedup)
        full_speedups.append(benchmark.full_tribunal_speedup)
        print(
            f"Run {index + 1}: sequential={benchmark.sequential.total_seconds:.3f}s "
            f"parallel={benchmark.parallel.total_seconds:.3f}s "
            f"first-wave={benchmark.first_wave_speedup:.2f}x "
            f"full={benchmark.full_tribunal_speedup:.2f}x"
        )

    print("-" * 60)
    print(f"Mean first-wave speedup: {statistics.mean(first_wave_speedups):.2f}x")
    print(f"Mean full Tribunal speedup: {statistics.mean(full_speedups):.2f}x")
    if args.mock:
        print("Mock mode measures orchestration overhead only, not GPU batching.")


if __name__ == "__main__":
    main()