#!/usr/bin/env python3 """ Test script for multi-model support Tests model switching and generation with CodeGen and Code-Llama """ import requests import time import sys import json BASE_URL = "http://localhost:8000" def print_header(text): """Print a formatted header""" print("\n" + "="*60) print(f" {text}") print("="*60) def print_result(success, message): """Print test result""" status = "✅ PASS" if success else "❌ FAIL" print(f"{status}: {message}") return success def test_health_check(): """Test if backend is running""" print_header("1. Health Check") try: response = requests.get(f"{BASE_URL}/health", timeout=5) data = response.json() print(f"Status: {data.get('status')}") print(f"Model loaded: {data.get('model_loaded')}") print(f"Device: {data.get('device')}") return print_result(response.status_code == 200, "Backend is running") except requests.exceptions.ConnectionError: return print_result(False, "Cannot connect to backend. Is it running?") except Exception as e: return print_result(False, f"Health check failed: {e}") def test_list_models(): """Test listing available models""" print_header("2. List Available Models") try: response = requests.get(f"{BASE_URL}/models", timeout=5) data = response.json() models = data.get('models', []) print(f"Found {len(models)} models:") for model in models: status = "✓" if model['available'] else "✗" current = " (CURRENT)" if model['is_current'] else "" print(f" {status} {model['name']} ({model['size']}) - {model['architecture']}{current}") return print_result(len(models) >= 2, f"Found {len(models)} models") except Exception as e: return print_result(False, f"List models failed: {e}") def test_current_model(): """Test getting current model info""" print_header("3. Get Current Model Info") try: response = requests.get(f"{BASE_URL}/models/current", timeout=5) data = response.json() print(f"Current model: {data.get('name')}") print(f"Model ID: {data.get('id')}") config = data.get('config', {}) print(f"Layers: {config.get('num_layers')}") print(f"Heads: {config.get('num_heads')}") print(f"Attention: {config.get('attention_type')}") return print_result(response.status_code == 200, "Got current model info") except Exception as e: return print_result(False, f"Get current model failed: {e}") def test_generation(model_name, prompt="def fibonacci(n):\n ", max_tokens=30): """Test text generation""" print_header(f"4. Test Generation with {model_name}") print(f"Prompt: {repr(prompt)}") print(f"Generating {max_tokens} tokens...") try: response = requests.post( f"{BASE_URL}/generate", json={ "prompt": prompt, "max_tokens": max_tokens, "temperature": 0.7, "extract_traces": False # Faster for testing }, timeout=60 # Generation can take a while ) if response.status_code != 200: return print_result(False, f"Generation failed: {response.status_code}") data = response.json() generated = data.get('generated_text', '') tokens = data.get('tokens', []) print(f"\nGenerated text:") print("-" * 60) print(generated) print("-" * 60) print(f"Token count: {len(tokens)}") print(f"Confidence: {data.get('confidence', 0):.3f}") print(f"Perplexity: {data.get('perplexity', 0):.3f}") return print_result(len(tokens) > 0, f"Generated {len(tokens)} tokens") except Exception as e: return print_result(False, f"Generation failed: {e}") def test_model_switch(model_id, model_name): """Test switching to a different model""" print_header(f"5. Switch to {model_name}") print(f"Switching to model: {model_id}") print("⏳ This may take a while (downloading + loading model)...") try: response = requests.post( f"{BASE_URL}/models/switch", json={"model_id": model_id}, timeout=300 # 5 minutes for download + loading ) if response.status_code != 200: return print_result(False, f"Switch failed: {response.status_code}") data = response.json() print(f"Message: {data.get('message')}") # Verify switch by getting current model verify_response = requests.get(f"{BASE_URL}/models/current", timeout=5) verify_data = verify_response.json() current_id = verify_data.get('id') success = current_id == model_id return print_result(success, f"Switched to {model_name}" if success else "Switch verification failed") except requests.exceptions.Timeout: return print_result(False, "Switch timeout - model download may be in progress") except Exception as e: return print_result(False, f"Switch failed: {e}") def test_model_info(): """Test detailed model info endpoint""" print_header("6. Get Detailed Model Info") try: response = requests.get(f"{BASE_URL}/model/info", timeout=5) data = response.json() print(f"Model: {data.get('name')}") print(f"Architecture: {data.get('architecture')}") print(f"Parameters: {data.get('totalParams'):,}") print(f"Layers: {data.get('layers')}") print(f"Heads: {data.get('heads')}") if data.get('kv_heads'): print(f"KV Heads: {data.get('kv_heads')} (GQA)") print(f"Attention type: {data.get('attention_type')}") print(f"Vocab size: {data.get('vocabSize'):,}") print(f"Context length: {data.get('maxPositions'):,}") return print_result(response.status_code == 200, "Got detailed model info") except Exception as e: return print_result(False, f"Get model info failed: {e}") def main(): """Run all tests""" print("\n🧪 Multi-Model Support Test Suite") print("This will test model switching between CodeGen 350M and Code-Llama 7B") print("\nIMPORTANT: Make sure the backend is running:") print(" cd /Users/garyboon/Development/VisualisableAI/visualisable-ai-backend") print(" python -m uvicorn backend.model_service:app --reload --port 8000") input("\nPress Enter to start tests...") results = [] # Test 1: Health check results.append(test_health_check()) if not results[-1]: print("\n❌ Backend not running. Exiting.") sys.exit(1) time.sleep(1) # Test 2: List models results.append(test_list_models()) time.sleep(1) # Test 3: Current model (should be CodeGen) results.append(test_current_model()) time.sleep(1) # Test 4: Get detailed model info results.append(test_model_info()) time.sleep(1) # Test 5: Generate with CodeGen results.append(test_generation("CodeGen 350M")) time.sleep(2) # Test 6: Switch to Code-Llama print("\n⚠️ WARNING: Next test will download Code-Llama 7B (~14GB)") print("This may take 5-10 minutes depending on your internet connection.") proceed = input("Proceed with Code-Llama test? (y/n): ").lower() if proceed == 'y': results.append(test_model_switch("code-llama-7b", "Code-Llama 7B")) if results[-1]: time.sleep(2) # Test 7: Get model info for Code-Llama results.append(test_model_info()) time.sleep(1) # Test 8: Generate with Code-Llama results.append(test_generation("Code-Llama 7B")) time.sleep(2) # Test 9: Switch back to CodeGen results.append(test_model_switch("codegen-350m", "CodeGen 350M")) if results[-1]: time.sleep(2) # Test 10: Verify CodeGen still works results.append(test_generation("CodeGen 350M (after switch back)")) else: print("\nSkipping Code-Llama tests.") # Summary print_header("Test Summary") passed = sum(results) total = len(results) print(f"Passed: {passed}/{total} tests") if passed == total: print("\n🎉 All tests passed! Multi-model support is working correctly.") return 0 else: print(f"\n⚠️ {total - passed} test(s) failed. Check output above for details.") return 1 if __name__ == "__main__": sys.exit(main())