bndos's picture
Add pp-doclayout server source with score threshold
3c0d3e1 verified
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import concurrent.futures as cf
import json
import statistics
import time
from pathlib import Path
import requests
def call(url: str) -> tuple[float, int, float]:
start = time.perf_counter()
r = requests.post(url, json={"return_boxes": True}, timeout=30)
r.raise_for_status()
latency = (time.perf_counter() - start) * 1000.0
data = r.json()
result = data["results"][0]
return latency, int(result.get("batch_size", 0)), float(result.get("infer_us", 0.0)) / 1000.0
def run(url: str, concurrency: int, requests_count: int, warmup: int) -> dict:
for _ in range(warmup):
call(url)
latencies = []
batch_sizes = []
infer_ms = []
start = time.perf_counter()
with cf.ThreadPoolExecutor(max_workers=concurrency) as ex:
futs = [ex.submit(call, url) for _ in range(requests_count)]
for fut in cf.as_completed(futs):
lat, batch_size, infer = fut.result()
latencies.append(lat)
batch_sizes.append(batch_size)
infer_ms.append(infer)
elapsed = time.perf_counter() - start
latencies.sort()
return {
"server": "rust_dynamic_batcher",
"concurrency": concurrency,
"requests": requests_count,
"pages": requests_count,
"elapsed_s": elapsed,
"pages_per_s": requests_count / elapsed,
"p50_ms": statistics.median(latencies),
"p95_ms": latencies[int(len(latencies) * 0.95) - 1],
"p99_ms": latencies[int(len(latencies) * 0.99) - 1],
"min_ms": latencies[0],
"max_ms": latencies[-1],
"avg_observed_batch": sum(batch_sizes) / len(batch_sizes),
"avg_engine_infer_ms_per_batch": sum(infer_ms) / len(infer_ms),
}
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--url", default="http://localhost:18082/v1/infer")
ap.add_argument("--concurrency", type=int, required=True)
ap.add_argument("--requests", type=int, default=200)
ap.add_argument("--warmup", type=int, default=10)
ap.add_argument("--output", type=Path)
args = ap.parse_args()
result = run(args.url, args.concurrency, args.requests, args.warmup)
print(json.dumps(result, indent=2))
if args.output:
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(json.dumps(result, indent=2) + "\n", encoding="utf-8")
if __name__ == "__main__":
main()