Spaces:

moztrk
/

sentinel-api

Runtime error

File size: 2,226 Bytes

857d4f5

import requests
import time
from concurrent.futures import ThreadPoolExecutor
import torch

API_URL = "http://127.0.0.1:8000/analyze"
VRAM_URL = "http://127.0.0.1:8000/vram-status"
TOTAL_REQUESTS = 50  # Toplam istek sayısı
CONCURRENT_USERS = 5  # Aynı anda saldıran kullanıcı sayısı

payload = {
    "text": "Bu bir performans testidir. Sistemimiz hem Türkçe hem İngilizce içerikleri başarıyla analiz edebiliyor.",
    "platform_dil": "tr",
}


def send_request(_):
    start = time.time()
    response = requests.post(API_URL, json=payload)
    response.raise_for_status()
    return (time.time() - start) * 1000


print(f"🔥 Stress Test Başlatılıyor: {TOTAL_REQUESTS} istek, {CONCURRENT_USERS} eşzamanlı kanal...")

with ThreadPoolExecutor(max_workers=CONCURRENT_USERS) as executor:
    # İlk isteği "Warm-up" (Isınma) için atalım ve sonuçlara dahil etmeyelim
    requests.post(API_URL, json=payload)

    start_time = time.time()
    latencies = list(executor.map(send_request, range(TOTAL_REQUESTS)))
    total_duration = time.time() - start_time

avg_latency = sum(latencies) / len(latencies)
rps = TOTAL_REQUESTS / total_duration

if torch.cuda.is_available():
    runtime_label = f"GPU - {torch.cuda.get_device_name(0)} Üzerinde"
else:
    runtime_label = "CPU Üzerinde"

print("\n" + "=" * 40)
print(f"📊 SONUÇLAR ({runtime_label})")
print("-" * 40)
print(f"⏱️ Ortalama Gecikme: {avg_latency:.2f} ms")
print(f"🚀 Saniyedeki İstek (RPS): {rps:.2f} req/sec")
print(f"⌛ Toplam Süre: {total_duration:.2f} saniye")
print("=" * 40)

print("\n🔎 VRAM Snapshot (/vram-status)")
try:
    vram_resp = requests.get(VRAM_URL, timeout=10)
    vram_resp.raise_for_status()
    vram = vram_resp.json()
    if vram.get("cuda_available"):
        print(f"📟 GPU: {vram.get('gpu_name', 'Bilinmiyor')}")
        print(f"🔥 Allocated: {vram.get('allocated_mb', 0)} MB")
        print(f"🛡️ Reserved: {vram.get('reserved_mb', 0)} MB")
        print(f"🆓 Free (Tahmini): {vram.get('free_estimate_mb', 0)} MB")
    else:
        print(f"ℹ️ {vram.get('message', 'CUDA aktif değil.')}")
except requests.RequestException as exc:
    print(f"⚠️ VRAM endpoint erişilemedi: {exc}")