#!/usr/bin/env python3 from __future__ import annotations import argparse import concurrent.futures as cf import json import statistics import time from pathlib import Path import requests def call(url: str) -> tuple[float, int, float]: start = time.perf_counter() r = requests.post(url, json={"return_boxes": True}, timeout=30) r.raise_for_status() latency = (time.perf_counter() - start) * 1000.0 data = r.json() result = data["results"][0] return latency, int(result.get("batch_size", 0)), float(result.get("infer_us", 0.0)) / 1000.0 def run(url: str, concurrency: int, requests_count: int, warmup: int) -> dict: for _ in range(warmup): call(url) latencies = [] batch_sizes = [] infer_ms = [] start = time.perf_counter() with cf.ThreadPoolExecutor(max_workers=concurrency) as ex: futs = [ex.submit(call, url) for _ in range(requests_count)] for fut in cf.as_completed(futs): lat, batch_size, infer = fut.result() latencies.append(lat) batch_sizes.append(batch_size) infer_ms.append(infer) elapsed = time.perf_counter() - start latencies.sort() return { "server": "rust_dynamic_batcher", "concurrency": concurrency, "requests": requests_count, "pages": requests_count, "elapsed_s": elapsed, "pages_per_s": requests_count / elapsed, "p50_ms": statistics.median(latencies), "p95_ms": latencies[int(len(latencies) * 0.95) - 1], "p99_ms": latencies[int(len(latencies) * 0.99) - 1], "min_ms": latencies[0], "max_ms": latencies[-1], "avg_observed_batch": sum(batch_sizes) / len(batch_sizes), "avg_engine_infer_ms_per_batch": sum(infer_ms) / len(infer_ms), } def main() -> None: ap = argparse.ArgumentParser() ap.add_argument("--url", default="http://localhost:18082/v1/infer") ap.add_argument("--concurrency", type=int, required=True) ap.add_argument("--requests", type=int, default=200) ap.add_argument("--warmup", type=int, default=10) ap.add_argument("--output", type=Path) args = ap.parse_args() result = run(args.url, args.concurrency, args.requests, args.warmup) print(json.dumps(result, indent=2)) if args.output: args.output.parent.mkdir(parents=True, exist_ok=True) args.output.write_text(json.dumps(result, indent=2) + "\n", encoding="utf-8") if __name__ == "__main__": main()