maris-ai-master / core-python /scripts /benchmark_chat.py
MarisUK's picture
Maris AI model sync
f440f03 verified
"""CLI bootstrap chat benchmark runner."""
from __future__ import annotations
import argparse
import asyncio
import json
import sys
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from maris_core.text.benchmark import ( # noqa: E402
load_chat_benchmark_dataset,
run_chat_benchmark,
summarize_chat_benchmark,
)
async def _main() -> int:
parser = argparse.ArgumentParser(
description="Palaiž Maris chat benchmark runner ar JSON eval datasetu"
)
parser.add_argument(
"dataset",
nargs="?",
default=PROJECT_ROOT / "evals" / "chat_eval_dataset.json",
type=Path,
help="JSON eval dataset fails",
)
parser.add_argument(
"--url",
default="http://localhost:8000/v1/text/generate",
help="Pilns core-python text endpoint URL",
)
parser.add_argument("--concurrency", type=int, default=1, help="Vienlaicīgo requestu skaits")
parser.add_argument(
"--timeout-seconds", type=float, default=120.0, help="HTTP timeout sekundēs"
)
parser.add_argument("--output", type=Path, help="Kur saglabāt JSON rezultātu")
args = parser.parse_args()
cases = load_chat_benchmark_dataset(args.dataset)
results = await run_chat_benchmark(
cases,
url=args.url,
concurrency=args.concurrency,
timeout_seconds=args.timeout_seconds,
)
summary = summarize_chat_benchmark(results)
rendered = json.dumps(summary, indent=2, ensure_ascii=False)
if args.output:
args.output.write_text(rendered + "\n", encoding="utf-8")
else:
print(rendered)
return 0 if summary["failed_cases"] == 0 else 1
if __name__ == "__main__":
raise SystemExit(asyncio.run(_main()))