Spaces:

moztrk
/

sentinel-api

Sleeping

sentinel-api / stress_test.py

Mustafa Öztürk

Deploy Sentinel API to HF Space

857d4f5 16 days ago

2.23 kB

	import requests
	import time
	from concurrent.futures import ThreadPoolExecutor
	import torch

	API_URL = "http://127.0.0.1:8000/analyze"
	VRAM_URL = "http://127.0.0.1:8000/vram-status"
	TOTAL_REQUESTS = 50 # Toplam istek sayısı
	CONCURRENT_USERS = 5 # Aynı anda saldıran kullanıcı sayısı

	payload = {
	"text": "Bu bir performans testidir. Sistemimiz hem Türkçe hem İngilizce içerikleri başarıyla analiz edebiliyor.",
	"platform_dil": "tr",
	}


	def send_request(_):
	start = time.time()
	response = requests.post(API_URL, json=payload)
	response.raise_for_status()
	return (time.time() - start) * 1000


	print(f"🔥 Stress Test Başlatılıyor: {TOTAL_REQUESTS} istek, {CONCURRENT_USERS} eşzamanlı kanal...")

	with ThreadPoolExecutor(max_workers=CONCURRENT_USERS) as executor:
	# İlk isteği "Warm-up" (Isınma) için atalım ve sonuçlara dahil etmeyelim
	requests.post(API_URL, json=payload)

	start_time = time.time()
	latencies = list(executor.map(send_request, range(TOTAL_REQUESTS)))
	total_duration = time.time() - start_time

	avg_latency = sum(latencies) / len(latencies)
	rps = TOTAL_REQUESTS / total_duration

	if torch.cuda.is_available():
	runtime_label = f"GPU - {torch.cuda.get_device_name(0)} Üzerinde"
	else:
	runtime_label = "CPU Üzerinde"

	print("\n" + "=" * 40)
	print(f"📊 SONUÇLAR ({runtime_label})")
	print("-" * 40)
	print(f"⏱️ Ortalama Gecikme: {avg_latency:.2f} ms")
	print(f"🚀 Saniyedeki İstek (RPS): {rps:.2f} req/sec")
	print(f"⌛ Toplam Süre: {total_duration:.2f} saniye")
	print("=" * 40)

	print("\n🔎 VRAM Snapshot (/vram-status)")
	try:
	vram_resp = requests.get(VRAM_URL, timeout=10)
	vram_resp.raise_for_status()
	vram = vram_resp.json()
	if vram.get("cuda_available"):
	print(f"📟 GPU: {vram.get('gpu_name', 'Bilinmiyor')}")
	print(f"🔥 Allocated: {vram.get('allocated_mb', 0)} MB")
	print(f"🛡️ Reserved: {vram.get('reserved_mb', 0)} MB")
	print(f"🆓 Free (Tahmini): {vram.get('free_estimate_mb', 0)} MB")
	else:
	print(f"ℹ️ {vram.get('message', 'CUDA aktif değil.')}")
	except requests.RequestException as exc:
	print(f"⚠️ VRAM endpoint erişilemedi: {exc}")