File size: 2,877 Bytes
b32fbe0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python3
"""
Knowledge Universe — Benchmark Script
Tests API response times across all crawlers.
Usage: python scripts/benchmark.py
"""
import asyncio
import time
import httpx

API_BASE = "http://localhost:8000"
API_KEY  = "ku_test_benchmark"   # Replace with a real test key

QUERIES = [
    {"topic": "transformer architecture", "difficulty": 3},
    {"topic": "retrieval augmented generation", "difficulty": 3},
    {"topic": "python data science", "difficulty": 2},
]


async def benchmark_query(client, query: dict) -> dict:
    start = time.time()
    try:
        resp = await client.post(
            f"{API_BASE}/v1/discover",
            headers={"X-API-Key": API_KEY},
            json={**query, "formats": ["pdf", "github", "stackoverflow"], "max_results": 5},
            timeout=30,
        )
        elapsed = round((time.time() - start) * 1000, 1)
        data = resp.json()
        return {
            "topic":       query["topic"],
            "status":      resp.status_code,
            "ms":          elapsed,
            "results":      data.get("total_found", 0),
            "cache_hit":    data.get("cache_hit", False),
        }
    except Exception as e:
        elapsed = round((time.time() - start) * 1000, 1)
        return {"topic": query["topic"], "status": "ERROR", "ms": elapsed, "error": str(e)}


async def main():
    print(f"\n{'='*60}")
    print("KNOWLEDGE UNIVERSE — BENCHMARK")
    print(f"{'='*60}\n")

    async with httpx.AsyncClient() as client:
        # Health check
        try:
            health = await client.get(f"{API_BASE}/health", timeout=5)
            print(f"API Status: {health.json().get('status', 'unknown')}\n")
        except Exception as e:
            print(f"API unreachable: {e}\nStart with: uvicorn main:app --reload\n")
            return

        # Run benchmarks
        results = []
        for query in QUERIES:
            # Run twice — first for cold, second for cache
            cold = await benchmark_query(client, query)
            warm = await benchmark_query(client, query)
            results.append({"cold": cold, "warm": warm})

            print(f"Topic: {query['topic']}")
            print(f"  Cold: {cold['ms']}ms  | {cold.get('results',0)} results | cache={cold.get('cache_hit')}")
            print(f"  Warm: {warm['ms']}ms  | {warm.get('results',0)} results | cache={warm.get('cache_hit')}")
            print()

        # Summary
        cold_times = [r["cold"]["ms"] for r in results if r["cold"].get("status") == 200]
        warm_times = [r["warm"]["ms"] for r in results if r["warm"].get("status") == 200]

        if cold_times:
            print(f"Average cold: {sum(cold_times)/len(cold_times):.0f}ms")
        if warm_times:
            print(f"Average warm: {sum(warm_times)/len(warm_times):.0f}ms")

if __name__ == "__main__":
    asyncio.run(main())