Spaces:
Sleeping
Sleeping
| import requests | |
| import time | |
| from concurrent.futures import ThreadPoolExecutor | |
| import torch | |
| API_URL = "http://127.0.0.1:8000/analyze" | |
| VRAM_URL = "http://127.0.0.1:8000/vram-status" | |
| TOTAL_REQUESTS = 50 # Toplam istek sayısı | |
| CONCURRENT_USERS = 5 # Aynı anda saldıran kullanıcı sayısı | |
| payload = { | |
| "text": "Bu bir performans testidir. Sistemimiz hem Türkçe hem İngilizce içerikleri başarıyla analiz edebiliyor.", | |
| "platform_dil": "tr", | |
| } | |
| def send_request(_): | |
| start = time.time() | |
| response = requests.post(API_URL, json=payload) | |
| response.raise_for_status() | |
| return (time.time() - start) * 1000 | |
| print(f"🔥 Stress Test Başlatılıyor: {TOTAL_REQUESTS} istek, {CONCURRENT_USERS} eşzamanlı kanal...") | |
| with ThreadPoolExecutor(max_workers=CONCURRENT_USERS) as executor: | |
| # İlk isteği "Warm-up" (Isınma) için atalım ve sonuçlara dahil etmeyelim | |
| requests.post(API_URL, json=payload) | |
| start_time = time.time() | |
| latencies = list(executor.map(send_request, range(TOTAL_REQUESTS))) | |
| total_duration = time.time() - start_time | |
| avg_latency = sum(latencies) / len(latencies) | |
| rps = TOTAL_REQUESTS / total_duration | |
| if torch.cuda.is_available(): | |
| runtime_label = f"GPU - {torch.cuda.get_device_name(0)} Üzerinde" | |
| else: | |
| runtime_label = "CPU Üzerinde" | |
| print("\n" + "=" * 40) | |
| print(f"📊 SONUÇLAR ({runtime_label})") | |
| print("-" * 40) | |
| print(f"⏱️ Ortalama Gecikme: {avg_latency:.2f} ms") | |
| print(f"🚀 Saniyedeki İstek (RPS): {rps:.2f} req/sec") | |
| print(f"⌛ Toplam Süre: {total_duration:.2f} saniye") | |
| print("=" * 40) | |
| print("\n🔎 VRAM Snapshot (/vram-status)") | |
| try: | |
| vram_resp = requests.get(VRAM_URL, timeout=10) | |
| vram_resp.raise_for_status() | |
| vram = vram_resp.json() | |
| if vram.get("cuda_available"): | |
| print(f"📟 GPU: {vram.get('gpu_name', 'Bilinmiyor')}") | |
| print(f"🔥 Allocated: {vram.get('allocated_mb', 0)} MB") | |
| print(f"🛡️ Reserved: {vram.get('reserved_mb', 0)} MB") | |
| print(f"🆓 Free (Tahmini): {vram.get('free_estimate_mb', 0)} MB") | |
| else: | |
| print(f"ℹ️ {vram.get('message', 'CUDA aktif değil.')}") | |
| except requests.RequestException as exc: | |
| print(f"⚠️ VRAM endpoint erişilemedi: {exc}") | |