"""
VoiceForge Memory Management Benchmark
Tests memory reduction capabilities
"""

import requests
import time

BASE_URL = "http://127.0.0.1:8000"

def get_memory():
    """Get current server memory usage"""
    try:
        resp = requests.get(f"{BASE_URL}/health/memory")
        if resp.status_code == 200:
            return resp.json()
    except Exception as e:
        print(f"Error: {e}")
    return None

def unload_all():
    """Unload all models"""
    try:
        resp = requests.post(f"{BASE_URL}/health/memory/unload-all")
        if resp.status_code == 200:
            return resp.json()
    except Exception as e:
        print(f"Error: {e}")
    return None

def trigger_stt():
    """Trigger STT to load model"""
    try:
        with open("test_audio.mp3", "rb") as f:
            resp = requests.post(
                f"{BASE_URL}/api/v1/stt/upload",
                files={"file": ("test.mp3", f, "audio/mpeg")},
                data={"language": "en"}
            )
        return resp.status_code == 200
    except Exception as e:
        print(f"STT Error: {e}")
        return False

def main():
    print("\n" + "="*60)
    print("🧠 VoiceForge Memory Management Benchmark")
    print("="*60)
    
    # 1. Check initial memory
    print("\n📊 1. Initial Memory State")
    print("-" * 40)
    mem = get_memory()
    if mem:
        print(f"   Memory: {mem['memory_mb']:.1f} MB")
        print(f"   Loaded Models: {mem['loaded_models']}")
    
    # 2. Trigger STT to ensure models are loaded
    print("\n📊 2. Loading Models (via STT request)")
    print("-" * 40)
    if trigger_stt():
        print("   ✅ STT request completed")
    
    time.sleep(1)  # Wait for model loading
    
    mem = get_memory()
    if mem:
        print(f"   Memory After Load: {mem['memory_mb']:.1f} MB")
        print(f"   Loaded Models: {mem['loaded_models']}")
        loaded_memory = mem['memory_mb']
    
    # 3. Unload all models
    print("\n📊 3. Unloading All Models")
    print("-" * 40)
    result = unload_all()
    if result:
        print(f"   Unloaded: {result['unloaded_models']}")
        print(f"   Memory Before: {result['memory_before_mb']:.1f} MB")
        print(f"   Memory After: {result['memory_after_mb']:.1f} MB")
        print(f"   Freed: {result['freed_mb']:.1f} MB")
        unloaded_memory = result['memory_after_mb']
    
    # 4. Summary
    print("\n" + "="*60)
    print("📈 MEMORY BENCHMARK SUMMARY")
    print("="*60)
    
    if mem and result:
        reduction = loaded_memory - unloaded_memory
        reduction_pct = (reduction / loaded_memory) * 100 if loaded_memory > 0 else 0
        
        print(f"\n| Metric | Value |")
        print(f"|--------|-------|")
        print(f"| Memory (Models Loaded) | {loaded_memory:.1f} MB |")
        print(f"| Memory (Models Unloaded) | {unloaded_memory:.1f} MB |")
        print(f"| Memory Reduction | {reduction:.1f} MB ({reduction_pct:.0f}%) |")
        
        if reduction > 500:
            print(f"\n✅ SUCCESS: Memory reduction of {reduction:.0f} MB achieved!")
        else:
            print(f"\n⚠️ Memory reduction lower than expected ({reduction:.0f} MB)")
    
    print("\n" + "="*60)
    print("🏁 Benchmark Complete")
    print("="*60)

if __name__ == "__main__":
    main()