Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Integration tests for UI Classification Improvements. | |
| Tests the complete workflow from chat to verification with all enhanced components | |
| working together correctly. This validates task 10 requirements. | |
| Requirements: 10.1 - Complete workflow testing | |
| """ | |
| import pytest | |
| import tempfile | |
| import os | |
| import json | |
| from datetime import datetime | |
| from unittest.mock import Mock, patch | |
| from src.interface.enhanced_results_display_manager import EnhancedResultsDisplayManager | |
| from src.core.provider_summary_generator import ProviderSummary, ProviderSummaryGenerator | |
| from src.core.improved_classification_prompt_manager import ImprovedClassificationPromptManager | |
| from src.config.enhanced_display_config import EnhancedDisplayConfig, get_enhanced_display_config | |
| from src.core.conversation_logger import ConversationLogger | |
| from src.core.spiritual_state import SpiritualState, SpiritualAssessment | |
| from src.core.conversation_verification import EnhancedConversationVerificationManager | |
| from src.core.ui_error_handler import UIErrorHandler | |
| class TestUIClassificationImprovementsIntegration: | |
| """ | |
| Integration tests for UI Classification Improvements. | |
| Tests the complete workflow: Chat β Classification β Display β Verification | |
| """ | |
| def setup_method(self): | |
| """Set up test environment.""" | |
| self.temp_dir = tempfile.mkdtemp() | |
| # Initialize all components | |
| self.display_manager = EnhancedResultsDisplayManager() | |
| self.summary_generator = ProviderSummaryGenerator() | |
| self.classification_manager = ImprovedClassificationPromptManager() | |
| self.verification_manager = EnhancedConversationVerificationManager(self.temp_dir) | |
| self.error_handler = UIErrorHandler() | |
| # Test data | |
| self.test_patient_name = "Integration Test Patient" | |
| self.test_patient_phone = "555-0199" | |
| def teardown_method(self): | |
| """Clean up test environment.""" | |
| import shutil | |
| shutil.rmtree(self.temp_dir, ignore_errors=True) | |
| def create_test_conversation_logger(self) -> ConversationLogger: | |
| """Create a conversation logger with test data for integration testing.""" | |
| logger = ConversationLogger(patient_name=self.test_patient_name) | |
| # Simulate a conversation that should trigger RED classification | |
| test_exchanges = [ | |
| { | |
| "user_message": "I've been feeling really lost lately", | |
| "assistant_response": "I understand this can be a difficult time. Can you tell me more about what you're experiencing?", | |
| "classification": SpiritualState.YELLOW, | |
| "confidence": 0.65, | |
| "indicators": ["feeling lost", "emotional distress"], | |
| "reasoning": "Patient expressing feelings of being lost, requires further assessment" | |
| }, | |
| { | |
| "user_message": "I just don't see the point in anything anymore. My life feels meaningless since my diagnosis.", | |
| "assistant_response": "I hear that you're struggling with finding meaning right now. These feelings are understandable given what you're going through.", | |
| "classification": SpiritualState.RED, | |
| "confidence": 0.85, | |
| "indicators": ["loss of meaning", "existential distress", "questioning purpose"], | |
| "reasoning": "Patient expressing loss of meaning and purpose, which is an explicit red flag indicator" | |
| }, | |
| { | |
| "user_message": "Sometimes I wonder if there's any point to all this suffering", | |
| "assistant_response": "Your feelings about suffering are very important. Many people in your situation have similar questions.", | |
| "classification": SpiritualState.RED, | |
| "confidence": 0.90, | |
| "indicators": ["doubt about meaning of suffering", "existential questioning"], | |
| "reasoning": "Patient questioning meaning of suffering - explicit red flag indicator requiring immediate attention" | |
| } | |
| ] | |
| for exchange in test_exchanges: | |
| assessment = SpiritualAssessment( | |
| state=exchange["classification"], | |
| confidence=exchange["confidence"], | |
| indicators=exchange["indicators"], | |
| reasoning=exchange["reasoning"] | |
| ) | |
| logger.log_exchange( | |
| exchange["user_message"], | |
| exchange["assistant_response"], | |
| assessment | |
| ) | |
| return logger | |
| def test_complete_workflow_integration(self): | |
| """ | |
| Test the complete workflow from chat to verification. | |
| This is the main integration test that validates all components | |
| work together correctly. | |
| """ | |
| print("π§ͺ Testing complete UI Classification Improvements workflow...") | |
| # Step 1: Create conversation with enhanced classification | |
| print(" 1. Creating conversation with enhanced classification...") | |
| logger = self.create_test_conversation_logger() | |
| # Verify conversation was logged correctly | |
| assert len(logger.entries) == 3 | |
| assert logger.patient_name == self.test_patient_name | |
| # Step 2: Generate provider summary with enhanced features | |
| print(" 2. Generating enhanced provider summary...") | |
| # Get the RED flag assessment from the conversation | |
| red_assessment = None | |
| for entry in logger.entries: | |
| if entry.spiritual_classification == "RED": | |
| # Create SpiritualAssessment from entry data | |
| from src.core.spiritual_state import SpiritualState | |
| red_assessment = SpiritualAssessment( | |
| state=SpiritualState.RED, | |
| confidence=entry.classification_confidence, | |
| indicators=entry.classification_indicators, | |
| reasoning=entry.classification_reasoning | |
| ) | |
| break | |
| assert red_assessment is not None, "Should have RED flag assessment" | |
| # Generate enhanced provider summary | |
| summary = self.summary_generator.generate_summary( | |
| indicators=red_assessment.indicators, | |
| reasoning=red_assessment.reasoning, | |
| confidence=red_assessment.confidence, | |
| patient_name=self.test_patient_name, | |
| patient_phone=self.test_patient_phone, | |
| conversation_context="Patient expressing loss of meaning and questioning suffering", | |
| medical_context={ | |
| "age": 45, | |
| "gender": "individual", | |
| "conditions": ["chronic illness", "recent diagnosis"] | |
| } | |
| ) | |
| # Verify summary was generated correctly | |
| assert isinstance(summary, ProviderSummary) | |
| assert summary.patient_name == self.test_patient_name | |
| assert summary.patient_phone == self.test_patient_phone | |
| assert summary.classification == "RED" | |
| assert len(summary.indicators) > 0 | |
| # Step 3: Format with enhanced display manager | |
| print(" 3. Formatting with enhanced display manager...") | |
| # Test AI analysis section formatting | |
| ai_analysis_html = self.display_manager.format_ai_analysis_section( | |
| classification="RED", | |
| indicators=red_assessment.indicators, | |
| reasoning=red_assessment.reasoning, | |
| confidence=red_assessment.confidence | |
| ) | |
| assert isinstance(ai_analysis_html, str) | |
| assert len(ai_analysis_html) > 0 | |
| assert "AI Analysis" in ai_analysis_html | |
| assert "RED FLAG" in ai_analysis_html | |
| # Test patient message section formatting | |
| patient_message_html = self.display_manager.format_patient_message_section( | |
| "Sometimes I wonder if there's any point to all this suffering" | |
| ) | |
| assert isinstance(patient_message_html, str) | |
| assert "Patient Message" in patient_message_html | |
| assert "suffering" in patient_message_html | |
| # Test provider summary section formatting | |
| provider_summary_html = self.display_manager.format_provider_summary_section(summary) | |
| assert isinstance(provider_summary_html, str) | |
| assert "Provider Summary" in provider_summary_html | |
| assert self.test_patient_name in provider_summary_html | |
| assert self.test_patient_phone in provider_summary_html | |
| # Step 4: Test coherent paragraph formatting | |
| print(" 4. Testing coherent paragraph formatting...") | |
| coherent_paragraph = self.summary_generator.format_coherent_paragraph(summary) | |
| assert isinstance(coherent_paragraph, str) | |
| assert len(coherent_paragraph) > 50 # Should be substantial | |
| assert self.test_patient_name in coherent_paragraph | |
| assert "45-year-old" in coherent_paragraph or "individual" in coherent_paragraph | |
| assert "RED FLAG" in coherent_paragraph | |
| # Step 5: Test combined results formatting | |
| print(" 5. Testing combined results formatting...") | |
| combined_html = self.display_manager.format_combined_results( | |
| ai_analysis={ | |
| 'classification': 'RED', | |
| 'indicators': red_assessment.indicators, | |
| 'reasoning': red_assessment.reasoning, | |
| 'confidence': red_assessment.confidence | |
| }, | |
| patient_message="Sometimes I wonder if there's any point to all this suffering", | |
| provider_summary=summary | |
| ) | |
| assert isinstance(combined_html, str) | |
| assert "AI Analysis" in combined_html | |
| assert "Patient Message" in combined_html | |
| assert "Provider Summary" in combined_html | |
| # Step 6: Test verification system integration | |
| print(" 6. Testing verification system integration...") | |
| verification_session = self.verification_manager.create_verification_session( | |
| logger, | |
| verifier_name="Integration Test", | |
| enable_enhanced_formats=True | |
| ) | |
| assert verification_session is not None | |
| assert verification_session.enhanced_format_enabled is True | |
| assert len(verification_session.verification_records) == 3 | |
| # Verify enhanced formats are applied | |
| for record in verification_session.verification_records: | |
| assert record.enhanced_display_format is not None | |
| assert record.visual_sections is not None | |
| # Step 7: Test CSV export with enhanced data | |
| print(" 7. Testing CSV export with enhanced data...") | |
| from src.core.verification_exporter import EnhancedVerificationExporter | |
| exporter = EnhancedVerificationExporter(self.temp_dir) | |
| csv_path = exporter.export_session_to_csv( | |
| verification_session, | |
| include_enhanced_data=True | |
| ) | |
| assert os.path.exists(csv_path) | |
| with open(csv_path, 'r', encoding='utf-8') as f: | |
| csv_content = f.read() | |
| # Verify enhanced data is in CSV | |
| assert 'has_enhanced_display' in csv_content | |
| assert 'enhanced_indicators_count' in csv_content | |
| assert self.test_patient_name in csv_content | |
| print(" β Complete workflow integration test passed!") | |
| def test_classification_consistency_validation(self): | |
| """Test that classification consistency is maintained throughout workflow.""" | |
| print("π§ͺ Testing classification consistency validation...") | |
| # Test explicit red indicators | |
| explicit_red_indicators = self.classification_manager.get_explicit_red_indicators() | |
| assert "Complex grief" in explicit_red_indicators | |
| assert "Loss of a loved one" in explicit_red_indicators | |
| assert "Doubt about meaning of life" in explicit_red_indicators | |
| assert "Doubt about meaning of suffering" in explicit_red_indicators | |
| assert "Doubt about personal dignity" in explicit_red_indicators | |
| # Test classification validation | |
| test_result = self.classification_manager.create_classification_result( | |
| classification="red", | |
| confidence=0.85, | |
| indicators=["doubt about meaning of suffering"], | |
| reasoning="Patient questioning meaning of suffering", | |
| red_flag_indicators=["doubt about meaning of suffering"] | |
| ) | |
| assert test_result.is_valid is True | |
| assert test_result.classification == "red" | |
| # Test invalid classification gets corrected | |
| invalid_result = self.classification_manager.create_classification_result( | |
| classification="invalid", | |
| confidence=2.0, | |
| indicators=[], | |
| reasoning="" | |
| ) | |
| assert invalid_result.classification in ["red", "yellow", "green"] | |
| assert 0.0 <= invalid_result.confidence <= 1.0 | |
| assert len(invalid_result.indicators) > 0 | |
| print(" β Classification consistency validation passed!") | |
| def test_error_handling_throughout_workflow(self): | |
| """Test error handling and recovery throughout the complete workflow.""" | |
| print("π§ͺ Testing error handling throughout workflow...") | |
| # Test with problematic data | |
| problematic_summary = ProviderSummary( | |
| patient_name="[Patient Name]", # Placeholder | |
| patient_phone="[Phone Number]", # Placeholder | |
| classification="RED", | |
| confidence=1.5, # Invalid confidence | |
| reasoning="", # Empty reasoning | |
| indicators=[], # No indicators | |
| severity_level="INVALID", | |
| urgency_level="INVALID" | |
| ) | |
| # Display manager should handle this gracefully | |
| display_result = self.display_manager.format_provider_summary_section(problematic_summary) | |
| assert isinstance(display_result, str) | |
| assert len(display_result) > 0 | |
| # Should contain validation warnings or fallback content | |
| assert "Provider Summary" in display_result or "validation" in display_result.lower() | |
| # Test error statistics collection | |
| validation_result = self.error_handler.validate_provider_summary_structure(problematic_summary) | |
| stats = self.error_handler.get_error_statistics(validation_result.errors) | |
| assert stats["total"] > 0 | |
| assert len(stats["by_category"]) > 0 | |
| print(" β Error handling throughout workflow passed!") | |
| def test_data_integrity_across_operations(self): | |
| """Test that data integrity is maintained across all operations.""" | |
| print("π§ͺ Testing data integrity across operations...") | |
| # Create test data | |
| original_indicators = ["loss of meaning", "spiritual distress", "questioning faith"] | |
| original_reasoning = "Patient expressing significant spiritual concerns" | |
| original_confidence = 0.85 | |
| # Generate summary | |
| summary = self.summary_generator.generate_summary( | |
| indicators=original_indicators, | |
| reasoning=original_reasoning, | |
| confidence=original_confidence, | |
| patient_name=self.test_patient_name, | |
| patient_phone=self.test_patient_phone | |
| ) | |
| # Verify data integrity in summary | |
| assert summary.patient_name == self.test_patient_name | |
| assert summary.patient_phone == self.test_patient_phone | |
| assert summary.confidence == original_confidence | |
| assert all(indicator in summary.indicators for indicator in original_indicators) | |
| # Format for display | |
| display_html = self.display_manager.format_provider_summary_section(summary) | |
| # Verify data integrity in display | |
| assert self.test_patient_name in display_html | |
| assert self.test_patient_phone in display_html | |
| # Format coherent paragraph | |
| coherent_paragraph = self.summary_generator.format_coherent_paragraph(summary) | |
| # Verify data integrity in coherent paragraph | |
| assert self.test_patient_name in coherent_paragraph | |
| assert self.test_patient_phone in coherent_paragraph | |
| # Export for verification | |
| export_data = summary.to_dict() | |
| # Verify data integrity in export | |
| assert export_data["patient_name"] == self.test_patient_name | |
| assert export_data["patient_phone"] == self.test_patient_phone | |
| assert export_data["confidence"] == original_confidence | |
| print(" β Data integrity across operations passed!") | |
| def test_performance_with_multiple_records(self): | |
| """Test performance and stability with multiple conversation records.""" | |
| print("π§ͺ Testing performance with multiple records...") | |
| # Create logger with multiple exchanges | |
| logger = ConversationLogger(patient_name="Performance Test Patient") | |
| # Add 10 exchanges to test performance | |
| for i in range(10): | |
| assessment = SpiritualAssessment( | |
| state=SpiritualState.RED if i % 3 == 0 else SpiritualState.YELLOW, | |
| confidence=0.7 + (i * 0.02), | |
| indicators=[f"indicator_{i}", f"concern_{i}"], | |
| reasoning=f"Test reasoning for exchange {i}" | |
| ) | |
| logger.log_exchange( | |
| f"User message {i}: I'm having concerns about my situation", | |
| f"Assistant response {i}: I understand your concerns", | |
| assessment | |
| ) | |
| # Test verification session creation with multiple records | |
| verification_session = self.verification_manager.create_verification_session( | |
| logger, | |
| enable_enhanced_formats=True | |
| ) | |
| assert len(verification_session.verification_records) == 10 | |
| # Test that all records have enhanced formats | |
| enhanced_count = sum( | |
| 1 for record in verification_session.verification_records | |
| if record.enhanced_display_format is not None | |
| ) | |
| assert enhanced_count == 10 | |
| # Test CSV export performance | |
| from src.core.verification_exporter import EnhancedVerificationExporter | |
| exporter = EnhancedVerificationExporter(self.temp_dir) | |
| csv_path = exporter.export_session_to_csv( | |
| verification_session, | |
| include_enhanced_data=True | |
| ) | |
| assert os.path.exists(csv_path) | |
| # Verify CSV contains all records | |
| with open(csv_path, 'r', encoding='utf-8') as f: | |
| csv_content = f.read() | |
| # Should have header + metadata + 10 data rows | |
| lines = [line for line in csv_content.split('\n') if line.strip()] | |
| data_lines = [line for line in lines if not line.startswith('#')] | |
| # At least header + 10 records | |
| assert len(data_lines) >= 11 | |
| print(" β Performance with multiple records passed!") | |
| def test_configuration_management_integration(self): | |
| """Test that configuration management works correctly across components.""" | |
| print("π§ͺ Testing configuration management integration...") | |
| # Test default configuration | |
| default_config = get_enhanced_display_config() | |
| assert default_config.enabled is True | |
| # Note: use_icons default may vary based on configuration file | |
| assert hasattr(default_config, 'use_icons') | |
| # Test custom configuration | |
| custom_config = EnhancedDisplayConfig( | |
| enabled=True, | |
| use_icons=False, | |
| use_visual_separators=False | |
| ) | |
| custom_display_manager = EnhancedResultsDisplayManager(config=custom_config) | |
| # Test that custom config is applied | |
| assert custom_display_manager.config.use_icons is False | |
| assert custom_display_manager.config.use_visual_separators is False | |
| # Test formatting with custom config | |
| result = custom_display_manager.format_ai_analysis_section( | |
| classification="RED", | |
| indicators=["test indicator"], | |
| reasoning="test reasoning" | |
| ) | |
| assert isinstance(result, str) | |
| assert len(result) > 0 | |
| # Test disabled mode | |
| disabled_config = EnhancedDisplayConfig(enabled=False) | |
| disabled_display_manager = EnhancedResultsDisplayManager(config=disabled_config) | |
| result = disabled_display_manager.format_ai_analysis_section( | |
| classification="RED", | |
| indicators=["test indicator"], | |
| reasoning="test reasoning" | |
| ) | |
| # Should fall back to basic formatting | |
| assert isinstance(result, str) | |
| assert "AI Analysis" in result | |
| print(" β Configuration management integration passed!") | |
| def run_integration_checkpoint(): | |
| """ | |
| Run the integration checkpoint tests. | |
| This function runs all integration tests and provides a summary | |
| of the results for task 10 validation. | |
| """ | |
| print("π Running UI Classification Improvements Integration Checkpoint") | |
| print("=" * 70) | |
| # Create test instance | |
| test_instance = TestUIClassificationImprovementsIntegration() | |
| tests = [ | |
| ("Complete Workflow Integration", test_instance.test_complete_workflow_integration), | |
| ("Classification Consistency", test_instance.test_classification_consistency_validation), | |
| ("Error Handling Throughout Workflow", test_instance.test_error_handling_throughout_workflow), | |
| ("Data Integrity Across Operations", test_instance.test_data_integrity_across_operations), | |
| ("Performance with Multiple Records", test_instance.test_performance_with_multiple_records), | |
| ("Configuration Management", test_instance.test_configuration_management_integration) | |
| ] | |
| passed = 0 | |
| failed = 0 | |
| for test_name, test_func in tests: | |
| try: | |
| print(f"\nπ§ͺ Running: {test_name}") | |
| test_instance.setup_method() | |
| test_func() | |
| test_instance.teardown_method() | |
| print(f" β {test_name} PASSED") | |
| passed += 1 | |
| except Exception as e: | |
| print(f" β {test_name} FAILED: {e}") | |
| failed += 1 | |
| import traceback | |
| traceback.print_exc() | |
| print("\n" + "=" * 70) | |
| print("π INTEGRATION CHECKPOINT RESULTS") | |
| print("=" * 70) | |
| print(f"β Tests Passed: {passed}") | |
| print(f"β Tests Failed: {failed}") | |
| print(f"π Success Rate: {(passed / (passed + failed)) * 100:.1f}%") | |
| if failed == 0: | |
| print("\nπ ALL INTEGRATION TESTS PASSED!") | |
| print("β All components work together correctly") | |
| print("β Full workflow from chat to verification validated") | |
| print("β Data integrity maintained across all operations") | |
| print("β Error handling and recovery working properly") | |
| print("β System ready for production use") | |
| return True | |
| else: | |
| print(f"\nβ οΈ {failed} integration tests failed") | |
| print("β System requires fixes before production use") | |
| return False | |
| if __name__ == "__main__": | |
| success = run_integration_checkpoint() | |
| exit(0 if success else 1) |