Spaces:

DocUA
/

Spiritual_Health_Project

Sleeping

File size: 23,778 Bytes

be1b5d2

#!/usr/bin/env python3
"""
Integration tests for UI Classification Improvements.

Tests the complete workflow from chat to verification with all enhanced components
working together correctly. This validates task 10 requirements.

Requirements: 10.1 - Complete workflow testing
"""

import pytest
import tempfile
import os
import json
from datetime import datetime
from unittest.mock import Mock, patch

from src.interface.enhanced_results_display_manager import EnhancedResultsDisplayManager
from src.core.provider_summary_generator import ProviderSummary, ProviderSummaryGenerator
from src.core.improved_classification_prompt_manager import ImprovedClassificationPromptManager
from src.config.enhanced_display_config import EnhancedDisplayConfig, get_enhanced_display_config
from src.core.conversation_logger import ConversationLogger
from src.core.spiritual_state import SpiritualState, SpiritualAssessment
from src.core.conversation_verification import EnhancedConversationVerificationManager
from src.core.ui_error_handler import UIErrorHandler


class TestUIClassificationImprovementsIntegration:
    """
    Integration tests for UI Classification Improvements.
    
    Tests the complete workflow: Chat → Classification → Display → Verification
    """
    
    def setup_method(self):
        """Set up test environment."""
        self.temp_dir = tempfile.mkdtemp()
        
        # Initialize all components
        self.display_manager = EnhancedResultsDisplayManager()
        self.summary_generator = ProviderSummaryGenerator()
        self.classification_manager = ImprovedClassificationPromptManager()
        self.verification_manager = EnhancedConversationVerificationManager(self.temp_dir)
        self.error_handler = UIErrorHandler()
        
        # Test data
        self.test_patient_name = "Integration Test Patient"
        self.test_patient_phone = "555-0199"
        
    def teardown_method(self):
        """Clean up test environment."""
        import shutil
        shutil.rmtree(self.temp_dir, ignore_errors=True)
    
    def create_test_conversation_logger(self) -> ConversationLogger:
        """Create a conversation logger with test data for integration testing."""
        logger = ConversationLogger(patient_name=self.test_patient_name)
        
        # Simulate a conversation that should trigger RED classification
        test_exchanges = [
            {
                "user_message": "I've been feeling really lost lately",
                "assistant_response": "I understand this can be a difficult time. Can you tell me more about what you're experiencing?",
                "classification": SpiritualState.YELLOW,
                "confidence": 0.65,
                "indicators": ["feeling lost", "emotional distress"],
                "reasoning": "Patient expressing feelings of being lost, requires further assessment"
            },
            {
                "user_message": "I just don't see the point in anything anymore. My life feels meaningless since my diagnosis.",
                "assistant_response": "I hear that you're struggling with finding meaning right now. These feelings are understandable given what you're going through.",
                "classification": SpiritualState.RED,
                "confidence": 0.85,
                "indicators": ["loss of meaning", "existential distress", "questioning purpose"],
                "reasoning": "Patient expressing loss of meaning and purpose, which is an explicit red flag indicator"
            },
            {
                "user_message": "Sometimes I wonder if there's any point to all this suffering",
                "assistant_response": "Your feelings about suffering are very important. Many people in your situation have similar questions.",
                "classification": SpiritualState.RED,
                "confidence": 0.90,
                "indicators": ["doubt about meaning of suffering", "existential questioning"],
                "reasoning": "Patient questioning meaning of suffering - explicit red flag indicator requiring immediate attention"
            }
        ]
        
        for exchange in test_exchanges:
            assessment = SpiritualAssessment(
                state=exchange["classification"],
                confidence=exchange["confidence"],
                indicators=exchange["indicators"],
                reasoning=exchange["reasoning"]
            )
            logger.log_exchange(
                exchange["user_message"],
                exchange["assistant_response"],
                assessment
            )
        
        return logger
    
    def test_complete_workflow_integration(self):
        """
        Test the complete workflow from chat to verification.
        
        This is the main integration test that validates all components
        work together correctly.
        """
        print("🧪 Testing complete UI Classification Improvements workflow...")
        
        # Step 1: Create conversation with enhanced classification
        print("  1. Creating conversation with enhanced classification...")
        logger = self.create_test_conversation_logger()
        
        # Verify conversation was logged correctly
        assert len(logger.entries) == 3
        assert logger.patient_name == self.test_patient_name
        
        # Step 2: Generate provider summary with enhanced features
        print("  2. Generating enhanced provider summary...")
        
        # Get the RED flag assessment from the conversation
        red_assessment = None
        for entry in logger.entries:
            if entry.spiritual_classification == "RED":
                # Create SpiritualAssessment from entry data
                from src.core.spiritual_state import SpiritualState
                red_assessment = SpiritualAssessment(
                    state=SpiritualState.RED,
                    confidence=entry.classification_confidence,
                    indicators=entry.classification_indicators,
                    reasoning=entry.classification_reasoning
                )
                break
        
        assert red_assessment is not None, "Should have RED flag assessment"
        
        # Generate enhanced provider summary
        summary = self.summary_generator.generate_summary(
            indicators=red_assessment.indicators,
            reasoning=red_assessment.reasoning,
            confidence=red_assessment.confidence,
            patient_name=self.test_patient_name,
            patient_phone=self.test_patient_phone,
            conversation_context="Patient expressing loss of meaning and questioning suffering",
            medical_context={
                "age": 45,
                "gender": "individual",
                "conditions": ["chronic illness", "recent diagnosis"]
            }
        )
        
        # Verify summary was generated correctly
        assert isinstance(summary, ProviderSummary)
        assert summary.patient_name == self.test_patient_name
        assert summary.patient_phone == self.test_patient_phone
        assert summary.classification == "RED"
        assert len(summary.indicators) > 0
        
        # Step 3: Format with enhanced display manager
        print("  3. Formatting with enhanced display manager...")
        
        # Test AI analysis section formatting
        ai_analysis_html = self.display_manager.format_ai_analysis_section(
            classification="RED",
            indicators=red_assessment.indicators,
            reasoning=red_assessment.reasoning,
            confidence=red_assessment.confidence
        )
        
        assert isinstance(ai_analysis_html, str)
        assert len(ai_analysis_html) > 0
        assert "AI Analysis" in ai_analysis_html
        assert "RED FLAG" in ai_analysis_html
        
        # Test patient message section formatting
        patient_message_html = self.display_manager.format_patient_message_section(
            "Sometimes I wonder if there's any point to all this suffering"
        )
        
        assert isinstance(patient_message_html, str)
        assert "Patient Message" in patient_message_html
        assert "suffering" in patient_message_html
        
        # Test provider summary section formatting
        provider_summary_html = self.display_manager.format_provider_summary_section(summary)
        
        assert isinstance(provider_summary_html, str)
        assert "Provider Summary" in provider_summary_html
        assert self.test_patient_name in provider_summary_html
        assert self.test_patient_phone in provider_summary_html
        
        # Step 4: Test coherent paragraph formatting
        print("  4. Testing coherent paragraph formatting...")
        
        coherent_paragraph = self.summary_generator.format_coherent_paragraph(summary)
        
        assert isinstance(coherent_paragraph, str)
        assert len(coherent_paragraph) > 50  # Should be substantial
        assert self.test_patient_name in coherent_paragraph
        assert "45-year-old" in coherent_paragraph or "individual" in coherent_paragraph
        assert "RED FLAG" in coherent_paragraph
        
        # Step 5: Test combined results formatting
        print("  5. Testing combined results formatting...")
        
        combined_html = self.display_manager.format_combined_results(
            ai_analysis={
                'classification': 'RED',
                'indicators': red_assessment.indicators,
                'reasoning': red_assessment.reasoning,
                'confidence': red_assessment.confidence
            },
            patient_message="Sometimes I wonder if there's any point to all this suffering",
            provider_summary=summary
        )
        
        assert isinstance(combined_html, str)
        assert "AI Analysis" in combined_html
        assert "Patient Message" in combined_html
        assert "Provider Summary" in combined_html
        
        # Step 6: Test verification system integration
        print("  6. Testing verification system integration...")
        
        verification_session = self.verification_manager.create_verification_session(
            logger,
            verifier_name="Integration Test",
            enable_enhanced_formats=True
        )
        
        assert verification_session is not None
        assert verification_session.enhanced_format_enabled is True
        assert len(verification_session.verification_records) == 3
        
        # Verify enhanced formats are applied
        for record in verification_session.verification_records:
            assert record.enhanced_display_format is not None
            assert record.visual_sections is not None
        
        # Step 7: Test CSV export with enhanced data
        print("  7. Testing CSV export with enhanced data...")
        
        from src.core.verification_exporter import EnhancedVerificationExporter
        exporter = EnhancedVerificationExporter(self.temp_dir)
        
        csv_path = exporter.export_session_to_csv(
            verification_session,
            include_enhanced_data=True
        )
        
        assert os.path.exists(csv_path)
        
        with open(csv_path, 'r', encoding='utf-8') as f:
            csv_content = f.read()
        
        # Verify enhanced data is in CSV
        assert 'has_enhanced_display' in csv_content
        assert 'enhanced_indicators_count' in csv_content
        assert self.test_patient_name in csv_content
        
        print("  ✅ Complete workflow integration test passed!")
    
    def test_classification_consistency_validation(self):
        """Test that classification consistency is maintained throughout workflow."""
        print("🧪 Testing classification consistency validation...")
        
        # Test explicit red indicators
        explicit_red_indicators = self.classification_manager.get_explicit_red_indicators()
        
        assert "Complex grief" in explicit_red_indicators
        assert "Loss of a loved one" in explicit_red_indicators
        assert "Doubt about meaning of life" in explicit_red_indicators
        assert "Doubt about meaning of suffering" in explicit_red_indicators
        assert "Doubt about personal dignity" in explicit_red_indicators
        
        # Test classification validation
        test_result = self.classification_manager.create_classification_result(
            classification="red",
            confidence=0.85,
            indicators=["doubt about meaning of suffering"],
            reasoning="Patient questioning meaning of suffering",
            red_flag_indicators=["doubt about meaning of suffering"]
        )
        
        assert test_result.is_valid is True
        assert test_result.classification == "red"
        
        # Test invalid classification gets corrected
        invalid_result = self.classification_manager.create_classification_result(
            classification="invalid",
            confidence=2.0,
            indicators=[],
            reasoning=""
        )
        
        assert invalid_result.classification in ["red", "yellow", "green"]
        assert 0.0 <= invalid_result.confidence <= 1.0
        assert len(invalid_result.indicators) > 0
        
        print("  ✅ Classification consistency validation passed!")
    
    def test_error_handling_throughout_workflow(self):
        """Test error handling and recovery throughout the complete workflow."""
        print("🧪 Testing error handling throughout workflow...")
        
        # Test with problematic data
        problematic_summary = ProviderSummary(
            patient_name="[Patient Name]",  # Placeholder
            patient_phone="[Phone Number]",  # Placeholder
            classification="RED",
            confidence=1.5,  # Invalid confidence
            reasoning="",  # Empty reasoning
            indicators=[],  # No indicators
            severity_level="INVALID",
            urgency_level="INVALID"
        )
        
        # Display manager should handle this gracefully
        display_result = self.display_manager.format_provider_summary_section(problematic_summary)
        
        assert isinstance(display_result, str)
        assert len(display_result) > 0
        # Should contain validation warnings or fallback content
        assert "Provider Summary" in display_result or "validation" in display_result.lower()
        
        # Test error statistics collection
        validation_result = self.error_handler.validate_provider_summary_structure(problematic_summary)
        stats = self.error_handler.get_error_statistics(validation_result.errors)
        
        assert stats["total"] > 0
        assert len(stats["by_category"]) > 0
        
        print("  ✅ Error handling throughout workflow passed!")
    
    def test_data_integrity_across_operations(self):
        """Test that data integrity is maintained across all operations."""
        print("🧪 Testing data integrity across operations...")
        
        # Create test data
        original_indicators = ["loss of meaning", "spiritual distress", "questioning faith"]
        original_reasoning = "Patient expressing significant spiritual concerns"
        original_confidence = 0.85
        
        # Generate summary
        summary = self.summary_generator.generate_summary(
            indicators=original_indicators,
            reasoning=original_reasoning,
            confidence=original_confidence,
            patient_name=self.test_patient_name,
            patient_phone=self.test_patient_phone
        )
        
        # Verify data integrity in summary
        assert summary.patient_name == self.test_patient_name
        assert summary.patient_phone == self.test_patient_phone
        assert summary.confidence == original_confidence
        assert all(indicator in summary.indicators for indicator in original_indicators)
        
        # Format for display
        display_html = self.display_manager.format_provider_summary_section(summary)
        
        # Verify data integrity in display
        assert self.test_patient_name in display_html
        assert self.test_patient_phone in display_html
        
        # Format coherent paragraph
        coherent_paragraph = self.summary_generator.format_coherent_paragraph(summary)
        
        # Verify data integrity in coherent paragraph
        assert self.test_patient_name in coherent_paragraph
        assert self.test_patient_phone in coherent_paragraph
        
        # Export for verification
        export_data = summary.to_dict()
        
        # Verify data integrity in export
        assert export_data["patient_name"] == self.test_patient_name
        assert export_data["patient_phone"] == self.test_patient_phone
        assert export_data["confidence"] == original_confidence
        
        print("  ✅ Data integrity across operations passed!")
    
    def test_performance_with_multiple_records(self):
        """Test performance and stability with multiple conversation records."""
        print("🧪 Testing performance with multiple records...")
        
        # Create logger with multiple exchanges
        logger = ConversationLogger(patient_name="Performance Test Patient")
        
        # Add 10 exchanges to test performance
        for i in range(10):
            assessment = SpiritualAssessment(
                state=SpiritualState.RED if i % 3 == 0 else SpiritualState.YELLOW,
                confidence=0.7 + (i * 0.02),
                indicators=[f"indicator_{i}", f"concern_{i}"],
                reasoning=f"Test reasoning for exchange {i}"
            )
            
            logger.log_exchange(
                f"User message {i}: I'm having concerns about my situation",
                f"Assistant response {i}: I understand your concerns",
                assessment
            )
        
        # Test verification session creation with multiple records
        verification_session = self.verification_manager.create_verification_session(
            logger,
            enable_enhanced_formats=True
        )
        
        assert len(verification_session.verification_records) == 10
        
        # Test that all records have enhanced formats
        enhanced_count = sum(
            1 for record in verification_session.verification_records
            if record.enhanced_display_format is not None
        )
        
        assert enhanced_count == 10
        
        # Test CSV export performance
        from src.core.verification_exporter import EnhancedVerificationExporter
        exporter = EnhancedVerificationExporter(self.temp_dir)
        
        csv_path = exporter.export_session_to_csv(
            verification_session,
            include_enhanced_data=True
        )
        
        assert os.path.exists(csv_path)
        
        # Verify CSV contains all records
        with open(csv_path, 'r', encoding='utf-8') as f:
            csv_content = f.read()
        
        # Should have header + metadata + 10 data rows
        lines = [line for line in csv_content.split('\n') if line.strip()]
        data_lines = [line for line in lines if not line.startswith('#')]
        
        # At least header + 10 records
        assert len(data_lines) >= 11
        
        print("  ✅ Performance with multiple records passed!")
    
    def test_configuration_management_integration(self):
        """Test that configuration management works correctly across components."""
        print("🧪 Testing configuration management integration...")
        
        # Test default configuration
        default_config = get_enhanced_display_config()
        assert default_config.enabled is True
        # Note: use_icons default may vary based on configuration file
        assert hasattr(default_config, 'use_icons')
        
        # Test custom configuration
        custom_config = EnhancedDisplayConfig(
            enabled=True,
            use_icons=False,
            use_visual_separators=False
        )
        
        custom_display_manager = EnhancedResultsDisplayManager(config=custom_config)
        
        # Test that custom config is applied
        assert custom_display_manager.config.use_icons is False
        assert custom_display_manager.config.use_visual_separators is False
        
        # Test formatting with custom config
        result = custom_display_manager.format_ai_analysis_section(
            classification="RED",
            indicators=["test indicator"],
            reasoning="test reasoning"
        )
        
        assert isinstance(result, str)
        assert len(result) > 0
        
        # Test disabled mode
        disabled_config = EnhancedDisplayConfig(enabled=False)
        disabled_display_manager = EnhancedResultsDisplayManager(config=disabled_config)
        
        result = disabled_display_manager.format_ai_analysis_section(
            classification="RED",
            indicators=["test indicator"],
            reasoning="test reasoning"
        )
        
        # Should fall back to basic formatting
        assert isinstance(result, str)
        assert "AI Analysis" in result
        
        print("  ✅ Configuration management integration passed!")


def run_integration_checkpoint():
    """
    Run the integration checkpoint tests.
    
    This function runs all integration tests and provides a summary
    of the results for task 10 validation.
    """
    print("🚀 Running UI Classification Improvements Integration Checkpoint")
    print("=" * 70)
    
    # Create test instance
    test_instance = TestUIClassificationImprovementsIntegration()
    
    tests = [
        ("Complete Workflow Integration", test_instance.test_complete_workflow_integration),
        ("Classification Consistency", test_instance.test_classification_consistency_validation),
        ("Error Handling Throughout Workflow", test_instance.test_error_handling_throughout_workflow),
        ("Data Integrity Across Operations", test_instance.test_data_integrity_across_operations),
        ("Performance with Multiple Records", test_instance.test_performance_with_multiple_records),
        ("Configuration Management", test_instance.test_configuration_management_integration)
    ]
    
    passed = 0
    failed = 0
    
    for test_name, test_func in tests:
        try:
            print(f"\n🧪 Running: {test_name}")
            test_instance.setup_method()
            test_func()
            test_instance.teardown_method()
            print(f"  ✅ {test_name} PASSED")
            passed += 1
        except Exception as e:
            print(f"  ❌ {test_name} FAILED: {e}")
            failed += 1
            import traceback
            traceback.print_exc()
    
    print("\n" + "=" * 70)
    print("📊 INTEGRATION CHECKPOINT RESULTS")
    print("=" * 70)
    print(f"✅ Tests Passed: {passed}")
    print(f"❌ Tests Failed: {failed}")
    print(f"📈 Success Rate: {(passed / (passed + failed)) * 100:.1f}%")
    
    if failed == 0:
        print("\n🎉 ALL INTEGRATION TESTS PASSED!")
        print("✅ All components work together correctly")
        print("✅ Full workflow from chat to verification validated")
        print("✅ Data integrity maintained across all operations")
        print("✅ Error handling and recovery working properly")
        print("✅ System ready for production use")
        return True
    else:
        print(f"\n⚠️  {failed} integration tests failed")
        print("❌ System requires fixes before production use")
        return False


if __name__ == "__main__":
    success = run_integration_checkpoint()
    exit(0 if success else 1)