Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Benchmark /api/search latency and error rate. | |
| Usage: | |
| python scripts/benchmark_search_api.py --runs 50 --concurrency 5 | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import concurrent.futures as cf | |
| import statistics | |
| import time | |
| from typing import Any, Dict, List, Tuple | |
| import requests | |
| DEFAULT_QUERIES = [ | |
| "EGFR inhibitor", | |
| "BRCA1 breast cancer", | |
| "kinase inhibitor therapy", | |
| "TP53 mutation cancer", | |
| "lung cancer EGFR signaling", | |
| ] | |
| def _one(base_url: str, query: str, top_k: int, use_mmr: bool) -> Tuple[bool, float, str]: | |
| t0 = time.perf_counter() | |
| try: | |
| r = requests.post( | |
| f"{base_url}/api/search", | |
| json={"query": query, "top_k": top_k, "use_mmr": use_mmr, "modality": "auto"}, | |
| timeout=30, | |
| ) | |
| dt_ms = (time.perf_counter() - t0) * 1000.0 | |
| if r.status_code != 200: | |
| return False, dt_ms, f"HTTP {r.status_code}" | |
| return True, dt_ms, "" | |
| except Exception as e: | |
| dt_ms = (time.perf_counter() - t0) * 1000.0 | |
| return False, dt_ms, str(e) | |
| def main() -> int: | |
| ap = argparse.ArgumentParser() | |
| ap.add_argument("--base-url", default="http://localhost:8000") | |
| ap.add_argument("--runs", type=int, default=50) | |
| ap.add_argument("--concurrency", type=int, default=5) | |
| ap.add_argument("--top-k", type=int, default=20) | |
| ap.add_argument("--mmr", action="store_true", help="Enable MMR") | |
| args = ap.parse_args() | |
| queries = (DEFAULT_QUERIES * ((args.runs // len(DEFAULT_QUERIES)) + 1))[: args.runs] | |
| latencies: List[float] = [] | |
| errors: List[str] = [] | |
| with cf.ThreadPoolExecutor(max_workers=args.concurrency) as ex: | |
| futures = [ | |
| ex.submit(_one, args.base_url, q, args.top_k, bool(args.mmr)) | |
| for q in queries | |
| ] | |
| for f in cf.as_completed(futures): | |
| ok, dt_ms, err = f.result() | |
| latencies.append(dt_ms) | |
| if not ok: | |
| errors.append(err) | |
| latencies.sort() | |
| p50 = latencies[int(0.50 * (len(latencies) - 1))] | |
| p95 = latencies[int(0.95 * (len(latencies) - 1))] | |
| p99 = latencies[int(0.99 * (len(latencies) - 1))] | |
| print("=" * 60) | |
| print("BioFlow /api/search Benchmark") | |
| print("=" * 60) | |
| print(f"Runs: {args.runs} | Concurrency: {args.concurrency} | top_k: {args.top_k} | mmr: {bool(args.mmr)}") | |
| print(f"OK: {args.runs - len(errors)} | Errors: {len(errors)}") | |
| print(f"p50: {p50:.1f}ms | p95: {p95:.1f}ms | p99: {p99:.1f}ms | mean: {statistics.mean(latencies):.1f}ms") | |
| if errors: | |
| print("Sample errors:") | |
| for e in errors[:5]: | |
| print(f" - {e}") | |
| # Non-zero exit on errors to allow CI usage. | |
| return 1 if errors else 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |