Spaces:
Sleeping
Sleeping
| import asyncio | |
| import time | |
| import aiohttp | |
| import statistics | |
| from pathlib import Path | |
| BASE_URL = "http://127.0.0.1:8000" | |
| AUDIO_FILE = "test_audio.mp3" | |
| async def transcribe_concurrent(n_requests=4): | |
| print(f"\n🚀 Starting Throughput Test with {n_requests} concurrent STT requests...") | |
| # Ensure audio exists | |
| if not Path(AUDIO_FILE).exists(): | |
| # Create dummy file if needed or fail | |
| print(f"❌ {AUDIO_FILE} not found. Run comprehensive benchmark first to generate it.") | |
| return | |
| async with aiohttp.ClientSession() as session: | |
| tasks = [] | |
| start_time = time.time() | |
| for i in range(n_requests): | |
| # Create form data for each request | |
| data = aiohttp.FormData() | |
| data.add_field('file', | |
| open(AUDIO_FILE, 'rb'), | |
| filename=AUDIO_FILE, | |
| content_type='audio/mpeg') | |
| data.add_field('language', 'en') | |
| tasks.append(session.post(f"{BASE_URL}/api/v1/stt/upload", data=data)) | |
| print("📨 Requests sent. Waiting for responses...") | |
| responses = await asyncio.gather(*tasks) | |
| durations = [] | |
| for resp in responses: | |
| if resp.status == 200: | |
| result = await resp.json() | |
| durations.append(result.get("processing_time", 0)) | |
| else: | |
| print(f"⚠️ Error: {resp.status}") | |
| total_time = time.time() - start_time | |
| print("\n📊 Throughput Results:") | |
| print(f" Concurrent Requests: {n_requests}") | |
| print(f" Total Wall Time: {total_time:.2f}s") | |
| print(f" Avg Process Time: {statistics.mean(durations):.2f}s" if durations else "N/A") | |
| print(f" Theoretical Seq: {sum(durations):.2f}s") | |
| # Parallelism Factor: How much faster than sequential? | |
| # 1.0 = Pure Sequential. n_requests = Perfect Parallelism. | |
| if total_time > 0: | |
| speedup = sum(durations) / total_time | |
| print(f" Parellelism Factor: {speedup:.2f}x (1.0 = Sequential)") | |
| if speedup < 1.5 and n_requests >= 4: | |
| print("\n💡 ANALYSIS: Throughput is bottlenecked! The system is processing requests sequentially.") | |
| print(" 👉 Recommendation: Implement 'Batched Inference' to process multiple inputs simultaneously.") | |
| else: | |
| print("\n✅ ANALYSIS: Throughput is scaling well.") | |
| if __name__ == "__main__": | |
| asyncio.run(transcribe_concurrent()) | |