Knowledge-Universe / scripts /benchmark.py
vlsiddarth's picture
Block 3: auth, decay engine, signup, usage, landing page, all empty files filled
b32fbe0
#!/usr/bin/env python3
"""
Knowledge Universe β€” Benchmark Script
Tests API response times across all crawlers.
Usage: python scripts/benchmark.py
"""
import asyncio
import time
import httpx
API_BASE = "http://localhost:8000"
API_KEY = "ku_test_benchmark" # Replace with a real test key
QUERIES = [
{"topic": "transformer architecture", "difficulty": 3},
{"topic": "retrieval augmented generation", "difficulty": 3},
{"topic": "python data science", "difficulty": 2},
]
async def benchmark_query(client, query: dict) -> dict:
start = time.time()
try:
resp = await client.post(
f"{API_BASE}/v1/discover",
headers={"X-API-Key": API_KEY},
json={**query, "formats": ["pdf", "github", "stackoverflow"], "max_results": 5},
timeout=30,
)
elapsed = round((time.time() - start) * 1000, 1)
data = resp.json()
return {
"topic": query["topic"],
"status": resp.status_code,
"ms": elapsed,
"results": data.get("total_found", 0),
"cache_hit": data.get("cache_hit", False),
}
except Exception as e:
elapsed = round((time.time() - start) * 1000, 1)
return {"topic": query["topic"], "status": "ERROR", "ms": elapsed, "error": str(e)}
async def main():
print(f"\n{'='*60}")
print("KNOWLEDGE UNIVERSE β€” BENCHMARK")
print(f"{'='*60}\n")
async with httpx.AsyncClient() as client:
# Health check
try:
health = await client.get(f"{API_BASE}/health", timeout=5)
print(f"API Status: {health.json().get('status', 'unknown')}\n")
except Exception as e:
print(f"API unreachable: {e}\nStart with: uvicorn main:app --reload\n")
return
# Run benchmarks
results = []
for query in QUERIES:
# Run twice β€” first for cold, second for cache
cold = await benchmark_query(client, query)
warm = await benchmark_query(client, query)
results.append({"cold": cold, "warm": warm})
print(f"Topic: {query['topic']}")
print(f" Cold: {cold['ms']}ms | {cold.get('results',0)} results | cache={cold.get('cache_hit')}")
print(f" Warm: {warm['ms']}ms | {warm.get('results',0)} results | cache={warm.get('cache_hit')}")
print()
# Summary
cold_times = [r["cold"]["ms"] for r in results if r["cold"].get("status") == 200]
warm_times = [r["warm"]["ms"] for r in results if r["warm"].get("status") == 200]
if cold_times:
print(f"Average cold: {sum(cold_times)/len(cold_times):.0f}ms")
if warm_times:
print(f"Average warm: {sum(warm_times)/len(warm_times):.0f}ms")
if __name__ == "__main__":
asyncio.run(main())