| """CLI bootstrap chat benchmark runner.""" |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import asyncio |
| import json |
| import sys |
| from pathlib import Path |
|
|
| PROJECT_ROOT = Path(__file__).resolve().parents[1] |
| if str(PROJECT_ROOT) not in sys.path: |
| sys.path.insert(0, str(PROJECT_ROOT)) |
|
|
| from maris_core.text.benchmark import ( |
| load_chat_benchmark_dataset, |
| run_chat_benchmark, |
| summarize_chat_benchmark, |
| ) |
|
|
|
|
| async def _main() -> int: |
| parser = argparse.ArgumentParser( |
| description="Palaiž Maris chat benchmark runner ar JSON eval datasetu" |
| ) |
| parser.add_argument( |
| "dataset", |
| nargs="?", |
| default=PROJECT_ROOT / "evals" / "chat_eval_dataset.json", |
| type=Path, |
| help="JSON eval dataset fails", |
| ) |
| parser.add_argument( |
| "--url", |
| default="http://localhost:8000/v1/text/generate", |
| help="Pilns core-python text endpoint URL", |
| ) |
| parser.add_argument("--concurrency", type=int, default=1, help="Vienlaicīgo requestu skaits") |
| parser.add_argument( |
| "--timeout-seconds", type=float, default=120.0, help="HTTP timeout sekundēs" |
| ) |
| parser.add_argument("--output", type=Path, help="Kur saglabāt JSON rezultātu") |
| args = parser.parse_args() |
|
|
| cases = load_chat_benchmark_dataset(args.dataset) |
| results = await run_chat_benchmark( |
| cases, |
| url=args.url, |
| concurrency=args.concurrency, |
| timeout_seconds=args.timeout_seconds, |
| ) |
| summary = summarize_chat_benchmark(results) |
| rendered = json.dumps(summary, indent=2, ensure_ascii=False) |
|
|
| if args.output: |
| args.output.write_text(rendered + "\n", encoding="utf-8") |
| else: |
| print(rendered) |
| return 0 if summary["failed_cases"] == 0 else 1 |
|
|
|
|
| if __name__ == "__main__": |
| raise SystemExit(asyncio.run(_main())) |
|
|