Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Test for Task 9.2: Performance Metrics Collection Implementation. | |
| This script validates that performance metrics collection has been successfully implemented: | |
| - Performance metrics are collected during prompt executions | |
| - Response times and confidence levels are logged | |
| - Component-specific performance tracking works | |
| - Integration with existing system is seamless | |
| Requirements validated: 8.1, 8.2 | |
| """ | |
| import sys | |
| import os | |
| import time | |
| sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) | |
| from core.simplified_medical_app import SimplifiedMedicalApp | |
| from src.config.prompt_management.performance_monitor import PromptMonitor | |
| def test_performance_metrics_collection(): | |
| """Test Task 9.2: Performance metrics collection during prompt execution.""" | |
| print("Testing Task 9.2: Performance metrics collection...") | |
| # Create app with performance monitoring | |
| app = SimplifiedMedicalApp() | |
| # Verify performance monitor is initialized | |
| assert hasattr(app, 'performance_monitor'), "Should have performance monitor" | |
| assert isinstance(app.performance_monitor, PromptMonitor), "Should be PromptMonitor instance" | |
| # Test direct performance monitoring (independent of AI providers) | |
| print(" Testing direct performance monitoring...") | |
| # Directly test the performance monitor | |
| monitor = app.performance_monitor | |
| # Log some test metrics | |
| for i in range(3): | |
| monitor.track_execution( | |
| agent_type='spiritual_monitor', | |
| response_time=0.5 + i * 0.1, | |
| confidence=0.7 + i * 0.05, | |
| success=True, | |
| metadata={'test_execution': i, 'message_length': 50 + i * 10} | |
| ) | |
| # Get performance metrics | |
| metrics = app.get_performance_metrics('spiritual_monitor') | |
| # Verify metrics collection (Requirement 8.1) | |
| assert 'total_executions' in metrics, "Should track total executions" | |
| assert 'average_response_time' in metrics, "Should track average response time" | |
| assert 'average_confidence' in metrics, "Should track average confidence" | |
| assert 'success_rate' in metrics, "Should track success rate" | |
| # Verify we have collected metrics for our test executions | |
| assert metrics['total_executions'] >= 3, \ | |
| f"Should have at least 3 executions, got {metrics['total_executions']}" | |
| # Verify response times are reasonable | |
| assert metrics['average_response_time'] > 0, "Should have positive response times" | |
| assert metrics['average_response_time'] < 30, "Response times should be reasonable (< 30s)" | |
| # Verify confidence levels are in valid range | |
| assert 0 <= metrics['average_confidence'] <= 1, "Confidence should be between 0 and 1" | |
| # Verify success rate | |
| assert 0 <= metrics['success_rate'] <= 1, "Success rate should be between 0 and 1" | |
| print(f" β Collected metrics for {metrics['total_executions']} executions") | |
| print(f" β Average response time: {metrics['average_response_time']:.3f}s") | |
| print(f" β Average confidence: {metrics['average_confidence']:.3f}") | |
| print(f" β Success rate: {metrics['success_rate']:.3f}") | |
| # Test integration with actual message processing (if AI is available) | |
| print(" Testing integration with message processing...") | |
| try: | |
| # Process one test message | |
| history, status = app.process_message("Test message for monitoring") | |
| print(" β Message processing integration working") | |
| except Exception as e: | |
| print(f" β Message processing failed (expected without AI): {e}") | |
| # This is expected without AI providers, but monitoring should still work | |
| return True | |
| def test_component_specific_tracking(): | |
| """Test component-specific performance tracking.""" | |
| print("Testing component-specific performance tracking...") | |
| monitor = PromptMonitor() | |
| # Test tracking for different agent types | |
| agent_types = ['spiritual_monitor', 'triage_question', 'triage_evaluator'] | |
| for agent_type in agent_types: | |
| # Log some test metrics | |
| for i in range(3): | |
| monitor.track_execution( | |
| agent_type=agent_type, | |
| response_time=0.5 + i * 0.1, | |
| confidence=0.7 + i * 0.1, | |
| success=True, | |
| metadata={'test_execution': i} | |
| ) | |
| # Verify each agent has separate metrics | |
| for agent_type in agent_types: | |
| metrics = monitor.get_detailed_metrics(agent_type) | |
| assert metrics['total_executions'] == 3, f"Should have 3 executions for {agent_type}" | |
| assert metrics['average_response_time'] > 0, f"Should have response time for {agent_type}" | |
| assert metrics['average_confidence'] > 0, f"Should have confidence for {agent_type}" | |
| print(f" β {agent_type}: {metrics['total_executions']} executions tracked") | |
| return True | |
| def test_performance_trend_analysis(): | |
| """Test performance trend analysis capabilities.""" | |
| print("Testing performance trend analysis...") | |
| monitor = PromptMonitor() | |
| # Simulate improving performance over time | |
| base_time = 1.0 | |
| for i in range(10): | |
| # Gradually improving response times | |
| response_time = base_time - (i * 0.05) # Getting faster | |
| confidence = 0.6 + (i * 0.03) # Getting more confident | |
| monitor.track_execution( | |
| agent_type='test_agent', | |
| response_time=response_time, | |
| confidence=confidence, | |
| success=True | |
| ) | |
| # Get detailed metrics with trend analysis | |
| metrics = monitor.get_detailed_metrics('test_agent') | |
| # Verify trend analysis is available | |
| assert 'performance_trend' in metrics, "Should include performance trend analysis" | |
| assert 'confidence_distribution' in metrics, "Should include confidence distribution" | |
| # Verify trend detection | |
| trend = metrics['performance_trend'] | |
| assert trend in ['improving', 'stable', 'degrading', 'insufficient_data'], \ | |
| f"Should have valid trend value, got: {trend}" | |
| print(f" β Performance trend detected: {trend}") | |
| print(f" β Confidence distribution: {metrics['confidence_distribution']}") | |
| return True | |
| def test_error_handling_and_logging(): | |
| """Test error handling and logging in performance monitoring.""" | |
| print("Testing error handling and logging...") | |
| monitor = PromptMonitor() | |
| # Log some successful and failed executions | |
| for i in range(5): | |
| success = i % 2 == 0 # Alternate success/failure | |
| monitor.track_execution( | |
| agent_type='error_test_agent', | |
| response_time=0.5, | |
| confidence=0.8 if success else 0.3, | |
| success=success, | |
| metadata={'error_test': True, 'execution_id': i} | |
| ) | |
| # Get metrics | |
| metrics = monitor.get_detailed_metrics('error_test_agent') | |
| # Verify error tracking | |
| assert metrics['total_executions'] == 5, "Should track all executions" | |
| assert 0 < metrics['success_rate'] < 1, "Should have mixed success rate" | |
| # Verify error patterns are analyzed | |
| assert 'error_patterns' in metrics, "Should analyze error patterns" | |
| print(f" β Success rate: {metrics['success_rate']:.2f}") | |
| print(f" β Error patterns analyzed: {len(metrics['error_patterns'])} patterns found") | |
| return True | |
| def test_integration_with_existing_system(): | |
| """Test integration with existing medical app system.""" | |
| print("Testing integration with existing system...") | |
| app = SimplifiedMedicalApp() | |
| # Test that performance monitoring doesn't interfere with normal operation | |
| message = "I need help with my medication" | |
| # Process message normally | |
| history, status = app.process_message(message) | |
| # Verify normal operation still works | |
| assert isinstance(history, list), "Should return history list" | |
| assert isinstance(status, str), "Should return status string" | |
| assert len(history) > 0, "Should have message in history" | |
| # Verify performance metrics were collected | |
| all_metrics = app.get_performance_metrics() | |
| assert isinstance(all_metrics, dict), "Should return metrics dictionary" | |
| # Test optimization recommendations | |
| recommendations = app.get_optimization_recommendations() | |
| assert isinstance(recommendations, dict), "Should return recommendations dictionary" | |
| # Test improvement tracking | |
| tracking = app.get_improvement_tracking() | |
| assert isinstance(tracking, dict), "Should return tracking dictionary" | |
| print(" β Normal operation preserved") | |
| print(" β Performance metrics accessible") | |
| print(" β Optimization features available") | |
| return True | |
| def main(): | |
| """Run all Task 9.2 completion tests.""" | |
| print("=" * 70) | |
| print("TASK 9.2 COMPLETION VALIDATION: PERFORMANCE METRICS COLLECTION") | |
| print("=" * 70) | |
| try: | |
| # Test all components | |
| if not test_performance_metrics_collection(): | |
| return False | |
| if not test_component_specific_tracking(): | |
| return False | |
| if not test_performance_trend_analysis(): | |
| return False | |
| if not test_error_handling_and_logging(): | |
| return False | |
| if not test_integration_with_existing_system(): | |
| return False | |
| print("\n" + "=" * 70) | |
| print("β TASK 9.2 COMPLETED SUCCESSFULLY!") | |
| print("=" * 70) | |
| print("IMPLEMENTED FEATURES:") | |
| print("β Performance metrics collection during prompt executions") | |
| print("β Response time and confidence level logging") | |
| print("β Component-specific performance tracking") | |
| print("β Performance trend analysis capabilities") | |
| print("β Error handling and pattern detection") | |
| print("β Integration with existing medical assistant system") | |
| print("β Seamless operation without affecting core functionality") | |
| print("\nREQUIREMENTS VALIDATED:") | |
| print("β 8.1: Response time and confidence level logging implemented") | |
| print("β 8.2: Component-specific performance tracking working") | |
| print("=" * 70) | |
| return True | |
| except Exception as e: | |
| print(f"\nβ TASK 9.2 VALIDATION FAILED: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return False | |
| if __name__ == "__main__": | |
| success = main() | |
| sys.exit(0 if success else 1) |