File size: 1,852 Bytes
f440f03 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | """CLI bootstrap chat benchmark runner."""
from __future__ import annotations
import argparse
import asyncio
import json
import sys
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from maris_core.text.benchmark import ( # noqa: E402
load_chat_benchmark_dataset,
run_chat_benchmark,
summarize_chat_benchmark,
)
async def _main() -> int:
parser = argparse.ArgumentParser(
description="Palaiž Maris chat benchmark runner ar JSON eval datasetu"
)
parser.add_argument(
"dataset",
nargs="?",
default=PROJECT_ROOT / "evals" / "chat_eval_dataset.json",
type=Path,
help="JSON eval dataset fails",
)
parser.add_argument(
"--url",
default="http://localhost:8000/v1/text/generate",
help="Pilns core-python text endpoint URL",
)
parser.add_argument("--concurrency", type=int, default=1, help="Vienlaicīgo requestu skaits")
parser.add_argument(
"--timeout-seconds", type=float, default=120.0, help="HTTP timeout sekundēs"
)
parser.add_argument("--output", type=Path, help="Kur saglabāt JSON rezultātu")
args = parser.parse_args()
cases = load_chat_benchmark_dataset(args.dataset)
results = await run_chat_benchmark(
cases,
url=args.url,
concurrency=args.concurrency,
timeout_seconds=args.timeout_seconds,
)
summary = summarize_chat_benchmark(results)
rendered = json.dumps(summary, indent=2, ensure_ascii=False)
if args.output:
args.output.write_text(rendered + "\n", encoding="utf-8")
else:
print(rendered)
return 0 if summary["failed_cases"] == 0 else 1
if __name__ == "__main__":
raise SystemExit(asyncio.run(_main()))
|