Spaces:

lordofgaming
/

voiceforge

Sleeping

voiceforge / backend /tests /performance /benchmark_throughput.py

lordofgaming

Initial VoiceForge deployment (clean)

673435a 3 months ago

2.6 kB

	import asyncio
	import time
	import aiohttp
	import statistics
	from pathlib import Path

	BASE_URL = "http://127.0.0.1:8000"
	AUDIO_FILE = "test_audio.mp3"

	async def transcribe_concurrent(n_requests=4):
	print(f"\n🚀 Starting Throughput Test with {n_requests} concurrent STT requests...")

	# Ensure audio exists
	if not Path(AUDIO_FILE).exists():
	# Create dummy file if needed or fail
	print(f"❌ {AUDIO_FILE} not found. Run comprehensive benchmark first to generate it.")
	return

	async with aiohttp.ClientSession() as session:
	tasks = []
	start_time = time.time()

	for i in range(n_requests):
	# Create form data for each request
	data = aiohttp.FormData()
	data.add_field('file',
	open(AUDIO_FILE, 'rb'),
	filename=AUDIO_FILE,
	content_type='audio/mpeg')
	data.add_field('language', 'en')

	tasks.append(session.post(f"{BASE_URL}/api/v1/stt/upload", data=data))

	print("📨 Requests sent. Waiting for responses...")
	responses = await asyncio.gather(*tasks)
	durations = []

	for resp in responses:
	if resp.status == 200:
	result = await resp.json()
	durations.append(result.get("processing_time", 0))
	else:
	print(f"⚠️ Error: {resp.status}")

	total_time = time.time() - start_time

	print("\n📊 Throughput Results:")
	print(f" Concurrent Requests: {n_requests}")
	print(f" Total Wall Time: {total_time:.2f}s")
	print(f" Avg Process Time: {statistics.mean(durations):.2f}s" if durations else "N/A")
	print(f" Theoretical Seq: {sum(durations):.2f}s")

	# Parallelism Factor: How much faster than sequential?
	# 1.0 = Pure Sequential. n_requests = Perfect Parallelism.
	if total_time > 0:
	speedup = sum(durations) / total_time
	print(f" Parellelism Factor: {speedup:.2f}x (1.0 = Sequential)")

	if speedup < 1.5 and n_requests >= 4:
	print("\n💡 ANALYSIS: Throughput is bottlenecked! The system is processing requests sequentially.")
	print(" 👉 Recommendation: Implement 'Batched Inference' to process multiple inputs simultaneously.")
	else:
	print("\n✅ ANALYSIS: Throughput is scaling well.")

	if __name__ == "__main__":
	asyncio.run(transcribe_concurrent())