| |
| """ |
| ENHANCED FALLBACK SYSTEM TEST: Verify multiple models with fallback support. |
| ROBUST PRODUCTION SYSTEM TESTING. |
| """ |
|
|
| import os |
| import sys |
| import asyncio |
| import base64 |
| import time |
| from io import BytesIO |
| from PIL import Image, ImageDraw |
| import logging |
| from typing import Dict, Any, List |
|
|
| |
| ai_dir = os.path.join(os.path.dirname(__file__), 'ai') |
| sys.path.insert(0, ai_dir) |
|
|
| |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
| logger = logging.getLogger(__name__) |
|
|
| class EnhancedFallbackTester: |
| """Test the enhanced fallback system.""" |
| |
| def __init__(self): |
| self.test_results = {} |
| |
| def create_test_image(self) -> str: |
| """Create test image for multimodal testing.""" |
| print("🎨 Creating test image...") |
| |
| img = Image.new('RGB', (224, 224), color='white') |
| draw = ImageDraw.Draw(img) |
| |
| |
| draw.rectangle([0, 150, 224, 224], fill='lightgreen') |
| draw.rectangle([50, 100, 100, 150], fill='brown') |
| draw.polygon([30, 100, 75, 60, 120, 100], fill='red') |
| draw.ellipse([160, 80, 190, 110], fill='yellow') |
| |
| buffer = BytesIO() |
| img.save(buffer, format='PNG') |
| img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8') |
| |
| print("✅ Test image created") |
| return img_base64 |
| |
| async def test_model_loader_fallbacks(self) -> Dict[str, Any]: |
| """Test model loader with fallback support.""" |
| print("\n🔧 TESTING MODEL LOADER FALLBACKS") |
| print("=" * 60) |
| |
| try: |
| from ai.multimodal.enhanced_model_loader import get_enhanced_model_loader |
| |
| |
| loader = get_enhanced_model_loader(device="cpu") |
| |
| |
| task_results = {} |
| |
| for task in ["image_captioning", "vqa", "multimodal_chat", "text_classification"]: |
| print(f"\n📋 Testing task: {task}") |
| |
| try: |
| |
| available_models = loader.get_available_models_for_task(task) |
| print(f" 📦 Available models: {len(available_models)}") |
| |
| for model_info in available_models: |
| status = "✅ LOADED" if model_info["loaded"] else "⏳ NOT LOADED" |
| print(f" {status} {model_info['name']} (priority: {model_info['priority']})") |
| |
| |
| start_time = time.time() |
| model_info = loader.load_model_with_fallback(task) |
| load_time = time.time() - start_time |
| |
| if model_info: |
| print(f" ✅ Successfully loaded: {model_info['model_name']}") |
| print(f" 📊 Parameters: {model_info['parameters']:,}") |
| print(f" ⏱️ Load time: {load_time:.2f}s") |
| print(f" 💾 Memory: {model_info['memory_gb']}GB") |
| |
| task_results[task] = { |
| "success": True, |
| "model_used": model_info["model_name"], |
| "parameters": model_info["parameters"], |
| "load_time": load_time, |
| "available_models": len(available_models) |
| } |
| else: |
| print(f" ❌ Failed to load any model for task: {task}") |
| task_results[task] = { |
| "success": False, |
| "error": "No models loaded" |
| } |
| |
| except Exception as e: |
| print(f" ❌ Task {task} failed: {e}") |
| task_results[task] = { |
| "success": False, |
| "error": str(e) |
| } |
| |
| |
| system_status = loader.get_system_status() |
| |
| return { |
| "task_results": task_results, |
| "system_status": system_status, |
| "overall_success": all(result.get("success", False) for result in task_results.values()) |
| } |
| |
| except Exception as e: |
| print(f"❌ Model loader test failed: {e}") |
| return {"success": False, "error": str(e)} |
| |
| async def test_multimodal_handler_fallbacks(self) -> Dict[str, Any]: |
| """Test multimodal handler with fallback support.""" |
| print("\n🤖 TESTING MULTIMODAL HANDLER FALLBACKS") |
| print("=" * 60) |
| |
| try: |
| from ai.multimodal.enhanced_multimodal_handler import create_enhanced_multimodal_handler |
| from ai.multimodal.schemas import MultimodalEvaluationRequest, MultimodalInput |
| |
| |
| handler = create_enhanced_multimodal_handler(device="cpu", enable_fallback=True) |
| |
| |
| test_image = self.create_test_image() |
| |
| |
| test_scenarios = [ |
| { |
| "name": "Image Captioning", |
| "text": "Describe this image", |
| "image": test_image, |
| "expected_task": "image_captioning" |
| }, |
| { |
| "name": "Visual Question Answering", |
| "text": "What do you see in this image?", |
| "image": test_image, |
| "expected_task": "vqa" |
| }, |
| { |
| "name": "Multimodal Chat", |
| "text": "What can you tell me about this image?", |
| "image": test_image, |
| "expected_task": "multimodal_chat" |
| }, |
| { |
| "name": "Text Classification", |
| "text": "This is safe and educational content", |
| "image": None, |
| "expected_task": "text_classification" |
| } |
| ] |
| |
| scenario_results = [] |
| |
| for scenario in test_scenarios: |
| print(f"\n📝 Testing scenario: {scenario['name']}") |
| |
| try: |
| |
| multimodal_input = handler.process_input( |
| text=scenario["text"], |
| image=scenario["image"] |
| ) |
| |
| |
| request = MultimodalEvaluationRequest( |
| input=multimodal_input, |
| target_model="auto", |
| evaluation_type="test" |
| ) |
| |
| |
| start_time = time.time() |
| result = await handler.evaluate_multimodal(request) |
| eval_time = time.time() - start_time |
| |
| if result.success: |
| print(f" ✅ Success: {result.success}") |
| print(f" 🤖 Model Used: {result.model_used}") |
| print(f" 🔄 Fallback Used: {result.fallback_used}") |
| print(f" ⏱️ Processing Time: {result.processing_time_ms:.1f}ms") |
| print(f" 🛡️ Safety Score: {result.safety_score:.3f}") |
| |
| if result.evaluation and "model_response" in result.evaluation: |
| response = result.evaluation["model_response"] |
| print(f" 🤖 Response: '{response[:100]}...'") |
| |
| scenario_results.append({ |
| "scenario": scenario["name"], |
| "success": True, |
| "model_used": result.model_used, |
| "fallback_used": result.fallback_used, |
| "processing_time_ms": result.processing_time_ms, |
| "safety_score": result.safety_score, |
| "expected_task": scenario["expected_task"] |
| }) |
| else: |
| print(f" ❌ Evaluation failed") |
| scenario_results.append({ |
| "scenario": scenario["name"], |
| "success": False, |
| "error": "Evaluation failed" |
| }) |
| |
| except Exception as e: |
| print(f" ❌ Scenario failed: {e}") |
| scenario_results.append({ |
| "scenario": scenario["name"], |
| "success": False, |
| "error": str(e) |
| }) |
| |
| |
| successful = sum(1 for r in scenario_results if r["success"]) |
| total = len(scenario_results) |
| success_rate = successful / total |
| |
| return { |
| "scenario_results": scenario_results, |
| "successful_scenarios": successful, |
| "total_scenarios": total, |
| "success_rate": success_rate, |
| "overall_success": success_rate >= 0.75 |
| } |
| |
| except Exception as e: |
| print(f"❌ Multimodal handler test failed: {e}") |
| return {"success": False, "error": str(e)} |
| |
| async def test_fallback_robustness(self) -> Dict[str, Any]: |
| """Test fallback robustness by simulating failures.""" |
| print("\n🛡️ TESTING FALLBACK ROBUSTNESS") |
| print("=" * 60) |
| |
| try: |
| from ai.multimodal.enhanced_multimodal_handler import create_enhanced_multimodal_handler |
| from ai.multimodal.schemas import MultimodalEvaluationRequest, MultimodalInput |
| |
| |
| handler = create_enhanced_multimodal_handler(device="cpu", enable_fallback=True) |
| |
| |
| status = handler.get_system_status() |
| |
| print(f"📊 System Status:") |
| print(f" 📋 Supported Tasks: {len(status['supported_tasks'])}") |
| print(f" 🤖 Loaded Models: {status['loaded_models']}") |
| |
| |
| for task, task_status in status["task_status"].items(): |
| print(f"\n📋 Task: {task}") |
| print(f" 📦 Total Models: {task_status['total_models']}") |
| print(f" ✅ Loaded Models: {task_status['loaded_models']}") |
| print(f" 🎯 Primary Loaded: {task_status['primary_loaded']}") |
| |
| for model in task_status["available_models"]: |
| status_icon = "✅" if model["loaded"] else "⏳" |
| print(f" {status_icon} {model['name']} (priority: {model['priority']})") |
| |
| |
| print(f"\n🔄 TESTING FALLBACK CHAIN:") |
| |
| |
| test_image = self.create_test_image() |
| |
| |
| multimodal_input = handler.process_input( |
| text="Describe this image", |
| image=test_image |
| ) |
| |
| request = MultimodalEvaluationRequest( |
| input=multimodal_input, |
| target_model="auto", |
| evaluation_type="fallback_test" |
| ) |
| |
| start_time = time.time() |
| result = await handler.evaluate_multimodal(request) |
| eval_time = time.time() - start_time |
| |
| if result.success: |
| print(f" ✅ Fallback chain successful") |
| print(f" 🤖 Final Model: {result.model_used}") |
| print(f" 🔄 Fallback Used: {result.fallback_used}") |
| print(f" ⏱️ Time: {eval_time:.1f}ms") |
| |
| if result.evaluation and "models_tried" in result.evaluation: |
| models_tried = result.evaluation["models_tried"] |
| print(f" 📋 Models Tried: {models_tried}") |
| |
| return { |
| "success": True, |
| "final_model": result.model_used, |
| "fallback_used": result.fallback_used, |
| "models_tried": result.evaluation.get("models_tried", []), |
| "processing_time_ms": eval_time |
| } |
| else: |
| print(f" ❌ Fallback chain failed") |
| return {"success": False, "error": "Fallback chain failed"} |
| |
| except Exception as e: |
| print(f"❌ Robustness test failed: {e}") |
| return {"success": False, "error": str(e)} |
| |
| async def run_comprehensive_fallback_test(self) -> Dict[str, Any]: |
| """Run comprehensive fallback system test.""" |
| print("🏭 ENHANCED FALLBACK SYSTEM TEST") |
| print("=" * 70) |
| print("🔄 TESTING MULTIPLE MODELS WITH FALLBACK SUPPORT") |
| print("🛡️ PRODUCTION ROBUSTNESS VALIDATION") |
| print() |
| |
| |
| test_results = {} |
| |
| |
| print("🧪 TEST 1: Model Loader Fallbacks") |
| test_results["model_loader"] = await self.test_model_loader_fallbacks() |
| |
| |
| print("\n🧪 TEST 2: Multimodal Handler Fallbacks") |
| test_results["multimodal_handler"] = await self.test_multimodal_handler_fallbacks() |
| |
| |
| print("\n🧪 TEST 3: Fallback Robustness") |
| test_results["robustness"] = await self.test_fallback_robustness() |
| |
| |
| tests_passed = sum(1 for result in test_results.values() if result.get("success", result.get("overall_success", False))) |
| total_tests = len(test_results) |
| overall_success_rate = tests_passed / total_tests |
| |
| |
| final_report = { |
| "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), |
| "total_tests": total_tests, |
| "tests_passed": tests_passed, |
| "overall_success_rate": overall_success_rate, |
| "system_robust": overall_success_rate >= 0.75, |
| "test_results": test_results, |
| "summary": { |
| "model_loader_fallbacks": test_results.get("model_loader", {}).get("overall_success", False), |
| "multimodal_handler_fallbacks": test_results.get("multimodal_handler", {}).get("overall_success", False), |
| "fallback_robustness": test_results.get("robustness", {}).get("success", False) |
| } |
| } |
| |
| return final_report |
| |
| def generate_fallback_report(self, report: Dict[str, Any]): |
| """Generate comprehensive fallback system report.""" |
| print("\n📊 ENHANCED FALLBACK SYSTEM REPORT") |
| print("=" * 70) |
| |
| print(f"\n🎯 OVERALL FALLBACK SYSTEM STATUS:") |
| print(f" 📅 Timestamp: {report['timestamp']}") |
| print(f" 🧪 Tests Run: {report['total_tests']}") |
| print(f" ✅ Tests Passed: {report['tests_passed']}") |
| print(f" 📈 Success Rate: {report['overall_success_rate']:.1%}") |
| print(f" 🛡️ System Robust: {'✅ YES' if report['system_robust'] else '❌ NO'}") |
| |
| print(f"\n📋 COMPONENT STATUS:") |
| summary = report.get("summary", {}) |
| |
| components = { |
| "model_loader_fallbacks": "🔧 Model Loader Fallbacks", |
| "multimodal_handler_fallbacks": "🤖 Multimodal Handler Fallbacks", |
| "fallback_robustness": "🛡️ Fallback Robustness" |
| } |
| |
| for key, name in components.items(): |
| status = "✅ PASS" if summary.get(key, False) else "❌ FAIL" |
| print(f" {status} {name}") |
| |
| |
| if "test_results" in report: |
| print(f"\n🔍 DETAILED RESULTS:") |
| |
| |
| if "model_loader" in report["test_results"]: |
| loader_result = report["test_results"]["model_loader"] |
| print(f"\n🔧 MODEL LOADER:") |
| if loader_result.get("overall_success"): |
| print(f" ✅ All tasks loaded successfully") |
| if "task_results" in loader_result: |
| for task, result in loader_result["task_results"].items(): |
| if result.get("success"): |
| print(f" ✅ {task}: {result.get('model_used', 'Unknown')}") |
| else: |
| print(f" ❌ {task}: Failed") |
| else: |
| print(f" ❌ Some tasks failed to load") |
| |
| |
| if "multimodal_handler" in report["test_results"]: |
| handler_result = report["test_results"]["multimodal_handler"] |
| print(f"\n🤖 MULTIMODAL HANDLER:") |
| if handler_result.get("overall_success"): |
| print(f" ✅ Scenarios: {handler_result.get('successful_scenarios', 0)}/{handler_result.get('total_scenarios', 0)}") |
| if "scenario_results" in handler_result: |
| for result in handler_result["scenario_results"]: |
| if result.get("success"): |
| fallback_status = "🔄" if result.get("fallback_used") else "✅" |
| print(f" {fallback_status} {result.get('scenario', 'Unknown')}: {result.get('model_used', 'Unknown')}") |
| else: |
| print(f" ❌ {result.get('scenario', 'Unknown')}: Failed") |
| else: |
| print(f" ❌ Some scenarios failed") |
| |
| |
| if "robustness" in report["test_results"]: |
| robust_result = report["test_results"]["robustness"] |
| print(f"\n🛡️ FALLBACK ROBUSTNESS:") |
| if robust_result.get("success"): |
| print(f" ✅ Fallback chain working") |
| print(f" 🤖 Final Model: {robust_result.get('final_model', 'Unknown')}") |
| print(f" 🔄 Fallback Used: {robust_result.get('fallback_used', False)}") |
| models_tried = robust_result.get("models_tried", []) |
| if models_tried: |
| print(f" 📋 Models Tried: {models_tried}") |
| else: |
| print(f" ❌ Fallback chain failed") |
| |
| |
| if report["system_robust"]: |
| print(f"\n🏆 ENHANCED FALLBACK SYSTEM: PRODUCTION READY!") |
| print(f" ✅ Multiple models with fallback support") |
| print(f" ✅ Robust error handling") |
| print(f" ✅ Automatic model switching") |
| print(f" ✅ Production reliability confirmed") |
| else: |
| print(f"\n⚠️ ENHANCED FALLBACK SYSTEM: NEEDS IMPROVEMENT") |
| print(f" ❌ Some fallback mechanisms not working") |
| print(f" 🔧 System needs optimization") |
| |
| return report |
|
|
| async def main(): |
| """Main test function.""" |
| print("🏭 ENHANCED FALLBACK SYSTEM TEST") |
| print("=" * 70) |
| print("🔄 TESTING MULTIPLE MODELS WITH FALLBACK SUPPORT") |
| print("🛡️ PRODUCTION ROBUSTNESS VALIDATION") |
| print() |
| |
| |
| tester = EnhancedFallbackTester() |
| |
| |
| fallback_report = await tester.run_comprehensive_fallback_test() |
| |
| |
| tester.generate_fallback_report(fallback_report) |
| |
| |
| return 0 if fallback_report.get("system_robust", False) else 1 |
|
|
| if __name__ == "__main__": |
| exit_code = asyncio.run(main()) |
| exit(exit_code) |
|
|