Spaces:
Sleeping
Sleeping
File size: 2,851 Bytes
fe617ac | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | import requests
import time
import numpy as np
import sys
BASE_URL = "http://localhost:6006"
def wait_for_service(timeout=60):
start = time.time()
print("Waiting for service to be ready...")
while time.time() - start < timeout:
try:
resp = requests.get(f"{BASE_URL}/health", timeout=1)
if resp.status_code == 200:
print(f"Service ready in {time.time() - start:.2f}s")
return True
except requests.exceptions.RequestException:
pass
time.sleep(1)
print("Service timed out.")
return False
def benchmark_endpoint(name, url, method="GET", json_body=None, n=10, warmup=2):
print(f"\nBenchmarking {name} ({method} {url})...")
latencies = []
# Warmup
for _ in range(warmup):
try:
if method == "GET":
requests.get(url)
else:
requests.post(url, json=json_body)
except:
pass
# Test
for i in range(n):
start = time.perf_counter()
try:
if method == "GET":
resp = requests.get(url)
else:
resp = requests.post(url, json=json_body)
resp.raise_for_status()
duration = (time.perf_counter() - start) * 1000 # ms
latencies.append(duration)
sys.stdout.write(".")
sys.stdout.flush()
except Exception as e:
print(f"E({e})")
print("\n")
if not latencies:
print("All requests failed.")
return
print(f"Results for {name}:")
print(f" Count: {len(latencies)}")
print(f" Mean: {np.mean(latencies):.2f} ms")
print(f" Median: {np.median(latencies):.2f} ms")
print(f" P95: {np.percentile(latencies, 95):.2f} ms")
print(f" Min: {np.min(latencies):.2f} ms")
print(f" Max: {np.max(latencies):.2f} ms")
def main():
if not wait_for_service(timeout=120): # Longer timeout for model loading
sys.exit(1)
# 1. Personalized Recommendations (Cold -> Warm)
benchmark_endpoint(
"Personalized Recs (Cached/Computed)",
f"{BASE_URL}/api/recommend/personal?user_id=local&top_k=20",
n=20
)
# 2. Search (Vector DB)
benchmark_endpoint(
"Semantic Search 'machine learning'",
f"{BASE_URL}/recommend",
method="POST",
json_body={"query": "machine learning", "category": "All", "tone": "All"},
n=20
)
# 3. Book Details (Metadata lookup via Favorites list)
# We simulate this by calling favorites list which does metadata lookups
benchmark_endpoint(
"Favorites List (Metadata Lookup)",
f"{BASE_URL}/favorites/list/local",
n=20
)
if __name__ == "__main__":
main()
|