Spaces:

lordofgaming
/

voiceforge

Sleeping

voiceforge / backend /tests /performance /benchmark_memory.py

lordofgaming

Initial VoiceForge deployment (clean)

673435a about 1 month ago

3.31 kB

	"""
	VoiceForge Memory Management Benchmark
	Tests memory reduction capabilities
	"""

	import requests
	import time

	BASE_URL = "http://127.0.0.1:8000"

	def get_memory():
	"""Get current server memory usage"""
	try:
	resp = requests.get(f"{BASE_URL}/health/memory")
	if resp.status_code == 200:
	return resp.json()
	except Exception as e:
	print(f"Error: {e}")
	return None

	def unload_all():
	"""Unload all models"""
	try:
	resp = requests.post(f"{BASE_URL}/health/memory/unload-all")
	if resp.status_code == 200:
	return resp.json()
	except Exception as e:
	print(f"Error: {e}")
	return None

	def trigger_stt():
	"""Trigger STT to load model"""
	try:
	with open("test_audio.mp3", "rb") as f:
	resp = requests.post(
	f"{BASE_URL}/api/v1/stt/upload",
	files={"file": ("test.mp3", f, "audio/mpeg")},
	data={"language": "en"}
	)
	return resp.status_code == 200
	except Exception as e:
	print(f"STT Error: {e}")
	return False

	def main():
	print("\n" + "="*60)
	print("🧠 VoiceForge Memory Management Benchmark")
	print("="*60)

	# 1. Check initial memory
	print("\n📊 1. Initial Memory State")
	print("-" * 40)
	mem = get_memory()
	if mem:
	print(f" Memory: {mem['memory_mb']:.1f} MB")
	print(f" Loaded Models: {mem['loaded_models']}")

	# 2. Trigger STT to ensure models are loaded
	print("\n📊 2. Loading Models (via STT request)")
	print("-" * 40)
	if trigger_stt():
	print(" ✅ STT request completed")

	time.sleep(1) # Wait for model loading

	mem = get_memory()
	if mem:
	print(f" Memory After Load: {mem['memory_mb']:.1f} MB")
	print(f" Loaded Models: {mem['loaded_models']}")
	loaded_memory = mem['memory_mb']

	# 3. Unload all models
	print("\n📊 3. Unloading All Models")
	print("-" * 40)
	result = unload_all()
	if result:
	print(f" Unloaded: {result['unloaded_models']}")
	print(f" Memory Before: {result['memory_before_mb']:.1f} MB")
	print(f" Memory After: {result['memory_after_mb']:.1f} MB")
	print(f" Freed: {result['freed_mb']:.1f} MB")
	unloaded_memory = result['memory_after_mb']

	# 4. Summary
	print("\n" + "="*60)
	print("📈 MEMORY BENCHMARK SUMMARY")
	print("="*60)

	if mem and result:
	reduction = loaded_memory - unloaded_memory
	reduction_pct = (reduction / loaded_memory) * 100 if loaded_memory > 0 else 0

	print(f"\n\| Metric \| Value \|")
	print(f"\|--------\|-------\|")
	print(f"\| Memory (Models Loaded) \| {loaded_memory:.1f} MB \|")
	print(f"\| Memory (Models Unloaded) \| {unloaded_memory:.1f} MB \|")
	print(f"\| Memory Reduction \| {reduction:.1f} MB ({reduction_pct:.0f}%) \|")

	if reduction > 500:
	print(f"\n✅ SUCCESS: Memory reduction of {reduction:.0f} MB achieved!")
	else:
	print(f"\n⚠️ Memory reduction lower than expected ({reduction:.0f} MB)")

	print("\n" + "="*60)
	print("🏁 Benchmark Complete")
	print("="*60)

	if __name__ == "__main__":
	main()