bioflow / scripts /benchmark_search_api.py
ramiiiiiiiiiiiiiiiiiiiiiiiiiiiiii's picture
Fix explorer/ingestion UI and 3D endpoints
673a52e
#!/usr/bin/env python3
"""
Benchmark /api/search latency and error rate.
Usage:
python scripts/benchmark_search_api.py --runs 50 --concurrency 5
"""
from __future__ import annotations
import argparse
import concurrent.futures as cf
import statistics
import time
from typing import Any, Dict, List, Tuple
import requests
DEFAULT_QUERIES = [
"EGFR inhibitor",
"BRCA1 breast cancer",
"kinase inhibitor therapy",
"TP53 mutation cancer",
"lung cancer EGFR signaling",
]
def _one(base_url: str, query: str, top_k: int, use_mmr: bool) -> Tuple[bool, float, str]:
t0 = time.perf_counter()
try:
r = requests.post(
f"{base_url}/api/search",
json={"query": query, "top_k": top_k, "use_mmr": use_mmr, "modality": "auto"},
timeout=30,
)
dt_ms = (time.perf_counter() - t0) * 1000.0
if r.status_code != 200:
return False, dt_ms, f"HTTP {r.status_code}"
return True, dt_ms, ""
except Exception as e:
dt_ms = (time.perf_counter() - t0) * 1000.0
return False, dt_ms, str(e)
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--base-url", default="http://localhost:8000")
ap.add_argument("--runs", type=int, default=50)
ap.add_argument("--concurrency", type=int, default=5)
ap.add_argument("--top-k", type=int, default=20)
ap.add_argument("--mmr", action="store_true", help="Enable MMR")
args = ap.parse_args()
queries = (DEFAULT_QUERIES * ((args.runs // len(DEFAULT_QUERIES)) + 1))[: args.runs]
latencies: List[float] = []
errors: List[str] = []
with cf.ThreadPoolExecutor(max_workers=args.concurrency) as ex:
futures = [
ex.submit(_one, args.base_url, q, args.top_k, bool(args.mmr))
for q in queries
]
for f in cf.as_completed(futures):
ok, dt_ms, err = f.result()
latencies.append(dt_ms)
if not ok:
errors.append(err)
latencies.sort()
p50 = latencies[int(0.50 * (len(latencies) - 1))]
p95 = latencies[int(0.95 * (len(latencies) - 1))]
p99 = latencies[int(0.99 * (len(latencies) - 1))]
print("=" * 60)
print("BioFlow /api/search Benchmark")
print("=" * 60)
print(f"Runs: {args.runs} | Concurrency: {args.concurrency} | top_k: {args.top_k} | mmr: {bool(args.mmr)}")
print(f"OK: {args.runs - len(errors)} | Errors: {len(errors)}")
print(f"p50: {p50:.1f}ms | p95: {p95:.1f}ms | p99: {p99:.1f}ms | mean: {statistics.mean(latencies):.1f}ms")
if errors:
print("Sample errors:")
for e in errors[:5]:
print(f" - {e}")
# Non-zero exit on errors to allow CI usage.
return 1 if errors else 0
if __name__ == "__main__":
raise SystemExit(main())