""" VoiceForge Memory Management Benchmark Tests memory reduction capabilities """ import requests import time BASE_URL = "http://127.0.0.1:8000" def get_memory(): """Get current server memory usage""" try: resp = requests.get(f"{BASE_URL}/health/memory") if resp.status_code == 200: return resp.json() except Exception as e: print(f"Error: {e}") return None def unload_all(): """Unload all models""" try: resp = requests.post(f"{BASE_URL}/health/memory/unload-all") if resp.status_code == 200: return resp.json() except Exception as e: print(f"Error: {e}") return None def trigger_stt(): """Trigger STT to load model""" try: with open("test_audio.mp3", "rb") as f: resp = requests.post( f"{BASE_URL}/api/v1/stt/upload", files={"file": ("test.mp3", f, "audio/mpeg")}, data={"language": "en"} ) return resp.status_code == 200 except Exception as e: print(f"STT Error: {e}") return False def main(): print("\n" + "="*60) print("🧠 VoiceForge Memory Management Benchmark") print("="*60) # 1. Check initial memory print("\nšŸ“Š 1. Initial Memory State") print("-" * 40) mem = get_memory() if mem: print(f" Memory: {mem['memory_mb']:.1f} MB") print(f" Loaded Models: {mem['loaded_models']}") # 2. Trigger STT to ensure models are loaded print("\nšŸ“Š 2. Loading Models (via STT request)") print("-" * 40) if trigger_stt(): print(" āœ… STT request completed") time.sleep(1) # Wait for model loading mem = get_memory() if mem: print(f" Memory After Load: {mem['memory_mb']:.1f} MB") print(f" Loaded Models: {mem['loaded_models']}") loaded_memory = mem['memory_mb'] # 3. Unload all models print("\nšŸ“Š 3. Unloading All Models") print("-" * 40) result = unload_all() if result: print(f" Unloaded: {result['unloaded_models']}") print(f" Memory Before: {result['memory_before_mb']:.1f} MB") print(f" Memory After: {result['memory_after_mb']:.1f} MB") print(f" Freed: {result['freed_mb']:.1f} MB") unloaded_memory = result['memory_after_mb'] # 4. Summary print("\n" + "="*60) print("šŸ“ˆ MEMORY BENCHMARK SUMMARY") print("="*60) if mem and result: reduction = loaded_memory - unloaded_memory reduction_pct = (reduction / loaded_memory) * 100 if loaded_memory > 0 else 0 print(f"\n| Metric | Value |") print(f"|--------|-------|") print(f"| Memory (Models Loaded) | {loaded_memory:.1f} MB |") print(f"| Memory (Models Unloaded) | {unloaded_memory:.1f} MB |") print(f"| Memory Reduction | {reduction:.1f} MB ({reduction_pct:.0f}%) |") if reduction > 500: print(f"\nāœ… SUCCESS: Memory reduction of {reduction:.0f} MB achieved!") else: print(f"\nāš ļø Memory reduction lower than expected ({reduction:.0f} MB)") print("\n" + "="*60) print("šŸ Benchmark Complete") print("="*60) if __name__ == "__main__": main()