voiceforge / backend /tests /performance /benchmark_throughput.py
lordofgaming
Initial VoiceForge deployment (clean)
673435a
import asyncio
import time
import aiohttp
import statistics
from pathlib import Path
BASE_URL = "http://127.0.0.1:8000"
AUDIO_FILE = "test_audio.mp3"
async def transcribe_concurrent(n_requests=4):
print(f"\n🚀 Starting Throughput Test with {n_requests} concurrent STT requests...")
# Ensure audio exists
if not Path(AUDIO_FILE).exists():
# Create dummy file if needed or fail
print(f"❌ {AUDIO_FILE} not found. Run comprehensive benchmark first to generate it.")
return
async with aiohttp.ClientSession() as session:
tasks = []
start_time = time.time()
for i in range(n_requests):
# Create form data for each request
data = aiohttp.FormData()
data.add_field('file',
open(AUDIO_FILE, 'rb'),
filename=AUDIO_FILE,
content_type='audio/mpeg')
data.add_field('language', 'en')
tasks.append(session.post(f"{BASE_URL}/api/v1/stt/upload", data=data))
print("📨 Requests sent. Waiting for responses...")
responses = await asyncio.gather(*tasks)
durations = []
for resp in responses:
if resp.status == 200:
result = await resp.json()
durations.append(result.get("processing_time", 0))
else:
print(f"⚠️ Error: {resp.status}")
total_time = time.time() - start_time
print("\n📊 Throughput Results:")
print(f" Concurrent Requests: {n_requests}")
print(f" Total Wall Time: {total_time:.2f}s")
print(f" Avg Process Time: {statistics.mean(durations):.2f}s" if durations else "N/A")
print(f" Theoretical Seq: {sum(durations):.2f}s")
# Parallelism Factor: How much faster than sequential?
# 1.0 = Pure Sequential. n_requests = Perfect Parallelism.
if total_time > 0:
speedup = sum(durations) / total_time
print(f" Parellelism Factor: {speedup:.2f}x (1.0 = Sequential)")
if speedup < 1.5 and n_requests >= 4:
print("\n💡 ANALYSIS: Throughput is bottlenecked! The system is processing requests sequentially.")
print(" 👉 Recommendation: Implement 'Batched Inference' to process multiple inputs simultaneously.")
else:
print("\n✅ ANALYSIS: Throughput is scaling well.")
if __name__ == "__main__":
asyncio.run(transcribe_concurrent())