| """Model Court benchmark: sequential vs. parallel first-wave Role Agent execution.""" |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import statistics |
| import sys |
| from pathlib import Path |
|
|
| sys.path.insert(0, str(Path(__file__).parent.parent)) |
|
|
| from core.court import run_model_court_benchmark |
| from core.court_client import MockCourtClient, VLLMCourtClient |
|
|
| CASE = """\ |
| Claim title: Escalation case with disputed slip-and-fall evidence |
| Claim amount: $50,000 |
| The claimant slipped on a wet floor. The policy was active. No witnesses. |
| Store manager says no camera. Prior claim two years ago. Medical bills submitted. |
| """ |
|
|
|
|
| def main() -> None: |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--mock", action="store_true") |
| parser.add_argument("--endpoint", default="http://localhost:8000/v1") |
| parser.add_argument("--model", default="Qwen/Qwen2.5-14B-Instruct") |
| parser.add_argument("--runs", type=int, default=1) |
| args = parser.parse_args() |
|
|
| mode = "mock" if args.mock else "vllm" |
| first_wave_speedups: list[float] = [] |
| full_speedups: list[float] = [] |
|
|
| print("\nModel Court Benchmark") |
| print("=" * 60) |
| print(f"Model: {args.model}") |
| print(f"Endpoint mode: {mode}") |
| for index in range(args.runs): |
| client = MockCourtClient() if args.mock else VLLMCourtClient(args.endpoint, args.model) |
| result = run_model_court_benchmark( |
| case_text=CASE, |
| client=client, |
| model_name=args.model, |
| endpoint_mode=mode, |
| case_title="Benchmark case", |
| ) |
| benchmark = result.benchmark |
| if benchmark is None: |
| raise RuntimeError("Benchmark artifact was not produced.") |
| first_wave_speedups.append(benchmark.first_wave_speedup) |
| full_speedups.append(benchmark.full_tribunal_speedup) |
| print( |
| f"Run {index + 1}: sequential={benchmark.sequential.total_seconds:.3f}s " |
| f"parallel={benchmark.parallel.total_seconds:.3f}s " |
| f"first-wave={benchmark.first_wave_speedup:.2f}x " |
| f"full={benchmark.full_tribunal_speedup:.2f}x" |
| ) |
|
|
| print("-" * 60) |
| print(f"Mean first-wave speedup: {statistics.mean(first_wave_speedups):.2f}x") |
| print(f"Mean full Tribunal speedup: {statistics.mean(full_speedups):.2f}x") |
| if args.mock: |
| print("Mock mode measures orchestration overhead only, not GPU batching.") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|