#!/usr/bin/env python3 """ Knowledge Universe — Benchmark Script Tests API response times across all crawlers. Usage: python scripts/benchmark.py """ import asyncio import time import httpx API_BASE = "http://localhost:8000" API_KEY = "ku_test_benchmark" # Replace with a real test key QUERIES = [ {"topic": "transformer architecture", "difficulty": 3}, {"topic": "retrieval augmented generation", "difficulty": 3}, {"topic": "python data science", "difficulty": 2}, ] async def benchmark_query(client, query: dict) -> dict: start = time.time() try: resp = await client.post( f"{API_BASE}/v1/discover", headers={"X-API-Key": API_KEY}, json={**query, "formats": ["pdf", "github", "stackoverflow"], "max_results": 5}, timeout=30, ) elapsed = round((time.time() - start) * 1000, 1) data = resp.json() return { "topic": query["topic"], "status": resp.status_code, "ms": elapsed, "results": data.get("total_found", 0), "cache_hit": data.get("cache_hit", False), } except Exception as e: elapsed = round((time.time() - start) * 1000, 1) return {"topic": query["topic"], "status": "ERROR", "ms": elapsed, "error": str(e)} async def main(): print(f"\n{'='*60}") print("KNOWLEDGE UNIVERSE — BENCHMARK") print(f"{'='*60}\n") async with httpx.AsyncClient() as client: # Health check try: health = await client.get(f"{API_BASE}/health", timeout=5) print(f"API Status: {health.json().get('status', 'unknown')}\n") except Exception as e: print(f"API unreachable: {e}\nStart with: uvicorn main:app --reload\n") return # Run benchmarks results = [] for query in QUERIES: # Run twice — first for cold, second for cache cold = await benchmark_query(client, query) warm = await benchmark_query(client, query) results.append({"cold": cold, "warm": warm}) print(f"Topic: {query['topic']}") print(f" Cold: {cold['ms']}ms | {cold.get('results',0)} results | cache={cold.get('cache_hit')}") print(f" Warm: {warm['ms']}ms | {warm.get('results',0)} results | cache={warm.get('cache_hit')}") print() # Summary cold_times = [r["cold"]["ms"] for r in results if r["cold"].get("status") == 200] warm_times = [r["warm"]["ms"] for r in results if r["warm"].get("status") == 200] if cold_times: print(f"Average cold: {sum(cold_times)/len(cold_times):.0f}ms") if warm_times: print(f"Average warm: {sum(warm_times)/len(warm_times):.0f}ms") if __name__ == "__main__": asyncio.run(main())