Spaces:

DocUA
/

Spiritual_Health_Project

Sleeping

App Files Files Community

Spiritual_Health_Project / tests /integration /test_ui_classification_improvements_integration.py

DocUA

feat: Fix classification logic and remove redundant spiritual care message functionality

be1b5d2 11 days ago

raw

history blame contribute delete

23.8 kB

	#!/usr/bin/env python3
	"""
	Integration tests for UI Classification Improvements.

	Tests the complete workflow from chat to verification with all enhanced components
	working together correctly. This validates task 10 requirements.

	Requirements: 10.1 - Complete workflow testing
	"""

	import pytest
	import tempfile
	import os
	import json
	from datetime import datetime
	from unittest.mock import Mock, patch

	from src.interface.enhanced_results_display_manager import EnhancedResultsDisplayManager
	from src.core.provider_summary_generator import ProviderSummary, ProviderSummaryGenerator
	from src.core.improved_classification_prompt_manager import ImprovedClassificationPromptManager
	from src.config.enhanced_display_config import EnhancedDisplayConfig, get_enhanced_display_config
	from src.core.conversation_logger import ConversationLogger
	from src.core.spiritual_state import SpiritualState, SpiritualAssessment
	from src.core.conversation_verification import EnhancedConversationVerificationManager
	from src.core.ui_error_handler import UIErrorHandler


	class TestUIClassificationImprovementsIntegration:
	"""
	Integration tests for UI Classification Improvements.

	Tests the complete workflow: Chat → Classification → Display → Verification
	"""

	def setup_method(self):
	"""Set up test environment."""
	self.temp_dir = tempfile.mkdtemp()

	# Initialize all components
	self.display_manager = EnhancedResultsDisplayManager()
	self.summary_generator = ProviderSummaryGenerator()
	self.classification_manager = ImprovedClassificationPromptManager()
	self.verification_manager = EnhancedConversationVerificationManager(self.temp_dir)
	self.error_handler = UIErrorHandler()

	# Test data
	self.test_patient_name = "Integration Test Patient"
	self.test_patient_phone = "555-0199"

	def teardown_method(self):
	"""Clean up test environment."""
	import shutil
	shutil.rmtree(self.temp_dir, ignore_errors=True)

	def create_test_conversation_logger(self) -> ConversationLogger:
	"""Create a conversation logger with test data for integration testing."""
	logger = ConversationLogger(patient_name=self.test_patient_name)

	# Simulate a conversation that should trigger RED classification
	test_exchanges = [
	{
	"user_message": "I've been feeling really lost lately",
	"assistant_response": "I understand this can be a difficult time. Can you tell me more about what you're experiencing?",
	"classification": SpiritualState.YELLOW,
	"confidence": 0.65,
	"indicators": ["feeling lost", "emotional distress"],
	"reasoning": "Patient expressing feelings of being lost, requires further assessment"
	},
	{
	"user_message": "I just don't see the point in anything anymore. My life feels meaningless since my diagnosis.",
	"assistant_response": "I hear that you're struggling with finding meaning right now. These feelings are understandable given what you're going through.",
	"classification": SpiritualState.RED,
	"confidence": 0.85,
	"indicators": ["loss of meaning", "existential distress", "questioning purpose"],
	"reasoning": "Patient expressing loss of meaning and purpose, which is an explicit red flag indicator"
	},
	{
	"user_message": "Sometimes I wonder if there's any point to all this suffering",
	"assistant_response": "Your feelings about suffering are very important. Many people in your situation have similar questions.",
	"classification": SpiritualState.RED,
	"confidence": 0.90,
	"indicators": ["doubt about meaning of suffering", "existential questioning"],
	"reasoning": "Patient questioning meaning of suffering - explicit red flag indicator requiring immediate attention"
	}
	]

	for exchange in test_exchanges:
	assessment = SpiritualAssessment(
	state=exchange["classification"],
	confidence=exchange["confidence"],
	indicators=exchange["indicators"],
	reasoning=exchange["reasoning"]
	)
	logger.log_exchange(
	exchange["user_message"],
	exchange["assistant_response"],
	assessment
	)

	return logger

	def test_complete_workflow_integration(self):
	"""
	Test the complete workflow from chat to verification.

	This is the main integration test that validates all components
	work together correctly.
	"""
	print("🧪 Testing complete UI Classification Improvements workflow...")

	# Step 1: Create conversation with enhanced classification
	print(" 1. Creating conversation with enhanced classification...")
	logger = self.create_test_conversation_logger()

	# Verify conversation was logged correctly
	assert len(logger.entries) == 3
	assert logger.patient_name == self.test_patient_name

	# Step 2: Generate provider summary with enhanced features
	print(" 2. Generating enhanced provider summary...")

	# Get the RED flag assessment from the conversation
	red_assessment = None
	for entry in logger.entries:
	if entry.spiritual_classification == "RED":
	# Create SpiritualAssessment from entry data
	from src.core.spiritual_state import SpiritualState
	red_assessment = SpiritualAssessment(
	state=SpiritualState.RED,
	confidence=entry.classification_confidence,
	indicators=entry.classification_indicators,
	reasoning=entry.classification_reasoning
	)
	break

	assert red_assessment is not None, "Should have RED flag assessment"

	# Generate enhanced provider summary
	summary = self.summary_generator.generate_summary(
	indicators=red_assessment.indicators,
	reasoning=red_assessment.reasoning,
	confidence=red_assessment.confidence,
	patient_name=self.test_patient_name,
	patient_phone=self.test_patient_phone,
	conversation_context="Patient expressing loss of meaning and questioning suffering",
	medical_context={
	"age": 45,
	"gender": "individual",
	"conditions": ["chronic illness", "recent diagnosis"]
	}
	)

	# Verify summary was generated correctly
	assert isinstance(summary, ProviderSummary)
	assert summary.patient_name == self.test_patient_name
	assert summary.patient_phone == self.test_patient_phone
	assert summary.classification == "RED"
	assert len(summary.indicators) > 0

	# Step 3: Format with enhanced display manager
	print(" 3. Formatting with enhanced display manager...")

	# Test AI analysis section formatting
	ai_analysis_html = self.display_manager.format_ai_analysis_section(
	classification="RED",
	indicators=red_assessment.indicators,
	reasoning=red_assessment.reasoning,
	confidence=red_assessment.confidence
	)

	assert isinstance(ai_analysis_html, str)
	assert len(ai_analysis_html) > 0
	assert "AI Analysis" in ai_analysis_html
	assert "RED FLAG" in ai_analysis_html

	# Test patient message section formatting
	patient_message_html = self.display_manager.format_patient_message_section(
	"Sometimes I wonder if there's any point to all this suffering"
	)

	assert isinstance(patient_message_html, str)
	assert "Patient Message" in patient_message_html
	assert "suffering" in patient_message_html

	# Test provider summary section formatting
	provider_summary_html = self.display_manager.format_provider_summary_section(summary)

	assert isinstance(provider_summary_html, str)
	assert "Provider Summary" in provider_summary_html
	assert self.test_patient_name in provider_summary_html
	assert self.test_patient_phone in provider_summary_html

	# Step 4: Test coherent paragraph formatting
	print(" 4. Testing coherent paragraph formatting...")

	coherent_paragraph = self.summary_generator.format_coherent_paragraph(summary)

	assert isinstance(coherent_paragraph, str)
	assert len(coherent_paragraph) > 50 # Should be substantial
	assert self.test_patient_name in coherent_paragraph
	assert "45-year-old" in coherent_paragraph or "individual" in coherent_paragraph
	assert "RED FLAG" in coherent_paragraph

	# Step 5: Test combined results formatting
	print(" 5. Testing combined results formatting...")

	combined_html = self.display_manager.format_combined_results(
	ai_analysis={
	'classification': 'RED',
	'indicators': red_assessment.indicators,
	'reasoning': red_assessment.reasoning,
	'confidence': red_assessment.confidence
	},
	patient_message="Sometimes I wonder if there's any point to all this suffering",
	provider_summary=summary
	)

	assert isinstance(combined_html, str)
	assert "AI Analysis" in combined_html
	assert "Patient Message" in combined_html
	assert "Provider Summary" in combined_html

	# Step 6: Test verification system integration
	print(" 6. Testing verification system integration...")

	verification_session = self.verification_manager.create_verification_session(
	logger,
	verifier_name="Integration Test",
	enable_enhanced_formats=True
	)

	assert verification_session is not None
	assert verification_session.enhanced_format_enabled is True
	assert len(verification_session.verification_records) == 3

	# Verify enhanced formats are applied
	for record in verification_session.verification_records:
	assert record.enhanced_display_format is not None
	assert record.visual_sections is not None

	# Step 7: Test CSV export with enhanced data
	print(" 7. Testing CSV export with enhanced data...")

	from src.core.verification_exporter import EnhancedVerificationExporter
	exporter = EnhancedVerificationExporter(self.temp_dir)

	csv_path = exporter.export_session_to_csv(
	verification_session,
	include_enhanced_data=True
	)

	assert os.path.exists(csv_path)

	with open(csv_path, 'r', encoding='utf-8') as f:
	csv_content = f.read()

	# Verify enhanced data is in CSV
	assert 'has_enhanced_display' in csv_content
	assert 'enhanced_indicators_count' in csv_content
	assert self.test_patient_name in csv_content

	print(" ✅ Complete workflow integration test passed!")

	def test_classification_consistency_validation(self):
	"""Test that classification consistency is maintained throughout workflow."""
	print("🧪 Testing classification consistency validation...")

	# Test explicit red indicators
	explicit_red_indicators = self.classification_manager.get_explicit_red_indicators()

	assert "Complex grief" in explicit_red_indicators
	assert "Loss of a loved one" in explicit_red_indicators
	assert "Doubt about meaning of life" in explicit_red_indicators
	assert "Doubt about meaning of suffering" in explicit_red_indicators
	assert "Doubt about personal dignity" in explicit_red_indicators

	# Test classification validation
	test_result = self.classification_manager.create_classification_result(
	classification="red",
	confidence=0.85,
	indicators=["doubt about meaning of suffering"],
	reasoning="Patient questioning meaning of suffering",
	red_flag_indicators=["doubt about meaning of suffering"]
	)

	assert test_result.is_valid is True
	assert test_result.classification == "red"

	# Test invalid classification gets corrected
	invalid_result = self.classification_manager.create_classification_result(
	classification="invalid",
	confidence=2.0,
	indicators=[],
	reasoning=""
	)

	assert invalid_result.classification in ["red", "yellow", "green"]
	assert 0.0 <= invalid_result.confidence <= 1.0
	assert len(invalid_result.indicators) > 0

	print(" ✅ Classification consistency validation passed!")

	def test_error_handling_throughout_workflow(self):
	"""Test error handling and recovery throughout the complete workflow."""
	print("🧪 Testing error handling throughout workflow...")

	# Test with problematic data
	problematic_summary = ProviderSummary(
	patient_name="[Patient Name]", # Placeholder
	patient_phone="[Phone Number]", # Placeholder
	classification="RED",
	confidence=1.5, # Invalid confidence
	reasoning="", # Empty reasoning
	indicators=[], # No indicators
	severity_level="INVALID",
	urgency_level="INVALID"
	)

	# Display manager should handle this gracefully
	display_result = self.display_manager.format_provider_summary_section(problematic_summary)

	assert isinstance(display_result, str)
	assert len(display_result) > 0
	# Should contain validation warnings or fallback content
	assert "Provider Summary" in display_result or "validation" in display_result.lower()

	# Test error statistics collection
	validation_result = self.error_handler.validate_provider_summary_structure(problematic_summary)
	stats = self.error_handler.get_error_statistics(validation_result.errors)

	assert stats["total"] > 0
	assert len(stats["by_category"]) > 0

	print(" ✅ Error handling throughout workflow passed!")

	def test_data_integrity_across_operations(self):
	"""Test that data integrity is maintained across all operations."""
	print("🧪 Testing data integrity across operations...")

	# Create test data
	original_indicators = ["loss of meaning", "spiritual distress", "questioning faith"]
	original_reasoning = "Patient expressing significant spiritual concerns"
	original_confidence = 0.85

	# Generate summary
	summary = self.summary_generator.generate_summary(
	indicators=original_indicators,
	reasoning=original_reasoning,
	confidence=original_confidence,
	patient_name=self.test_patient_name,
	patient_phone=self.test_patient_phone
	)

	# Verify data integrity in summary
	assert summary.patient_name == self.test_patient_name
	assert summary.patient_phone == self.test_patient_phone
	assert summary.confidence == original_confidence
	assert all(indicator in summary.indicators for indicator in original_indicators)

	# Format for display
	display_html = self.display_manager.format_provider_summary_section(summary)

	# Verify data integrity in display
	assert self.test_patient_name in display_html
	assert self.test_patient_phone in display_html

	# Format coherent paragraph
	coherent_paragraph = self.summary_generator.format_coherent_paragraph(summary)

	# Verify data integrity in coherent paragraph
	assert self.test_patient_name in coherent_paragraph
	assert self.test_patient_phone in coherent_paragraph

	# Export for verification
	export_data = summary.to_dict()

	# Verify data integrity in export
	assert export_data["patient_name"] == self.test_patient_name
	assert export_data["patient_phone"] == self.test_patient_phone
	assert export_data["confidence"] == original_confidence

	print(" ✅ Data integrity across operations passed!")

	def test_performance_with_multiple_records(self):
	"""Test performance and stability with multiple conversation records."""
	print("🧪 Testing performance with multiple records...")

	# Create logger with multiple exchanges
	logger = ConversationLogger(patient_name="Performance Test Patient")

	# Add 10 exchanges to test performance
	for i in range(10):
	assessment = SpiritualAssessment(
	state=SpiritualState.RED if i % 3 == 0 else SpiritualState.YELLOW,
	confidence=0.7 + (i * 0.02),
	indicators=[f"indicator_{i}", f"concern_{i}"],
	reasoning=f"Test reasoning for exchange {i}"
	)

	logger.log_exchange(
	f"User message {i}: I'm having concerns about my situation",
	f"Assistant response {i}: I understand your concerns",
	assessment
	)

	# Test verification session creation with multiple records
	verification_session = self.verification_manager.create_verification_session(
	logger,
	enable_enhanced_formats=True
	)

	assert len(verification_session.verification_records) == 10

	# Test that all records have enhanced formats
	enhanced_count = sum(
	1 for record in verification_session.verification_records
	if record.enhanced_display_format is not None
	)

	assert enhanced_count == 10

	# Test CSV export performance
	from src.core.verification_exporter import EnhancedVerificationExporter
	exporter = EnhancedVerificationExporter(self.temp_dir)

	csv_path = exporter.export_session_to_csv(
	verification_session,
	include_enhanced_data=True
	)

	assert os.path.exists(csv_path)

	# Verify CSV contains all records
	with open(csv_path, 'r', encoding='utf-8') as f:
	csv_content = f.read()

	# Should have header + metadata + 10 data rows
	lines = [line for line in csv_content.split('\n') if line.strip()]
	data_lines = [line for line in lines if not line.startswith('#')]

	# At least header + 10 records
	assert len(data_lines) >= 11

	print(" ✅ Performance with multiple records passed!")

	def test_configuration_management_integration(self):
	"""Test that configuration management works correctly across components."""
	print("🧪 Testing configuration management integration...")

	# Test default configuration
	default_config = get_enhanced_display_config()
	assert default_config.enabled is True
	# Note: use_icons default may vary based on configuration file
	assert hasattr(default_config, 'use_icons')

	# Test custom configuration
	custom_config = EnhancedDisplayConfig(
	enabled=True,
	use_icons=False,
	use_visual_separators=False
	)

	custom_display_manager = EnhancedResultsDisplayManager(config=custom_config)

	# Test that custom config is applied
	assert custom_display_manager.config.use_icons is False
	assert custom_display_manager.config.use_visual_separators is False

	# Test formatting with custom config
	result = custom_display_manager.format_ai_analysis_section(
	classification="RED",
	indicators=["test indicator"],
	reasoning="test reasoning"
	)

	assert isinstance(result, str)
	assert len(result) > 0

	# Test disabled mode
	disabled_config = EnhancedDisplayConfig(enabled=False)
	disabled_display_manager = EnhancedResultsDisplayManager(config=disabled_config)

	result = disabled_display_manager.format_ai_analysis_section(
	classification="RED",
	indicators=["test indicator"],
	reasoning="test reasoning"
	)

	# Should fall back to basic formatting
	assert isinstance(result, str)
	assert "AI Analysis" in result

	print(" ✅ Configuration management integration passed!")


	def run_integration_checkpoint():
	"""
	Run the integration checkpoint tests.

	This function runs all integration tests and provides a summary
	of the results for task 10 validation.
	"""
	print("🚀 Running UI Classification Improvements Integration Checkpoint")
	print("=" * 70)

	# Create test instance
	test_instance = TestUIClassificationImprovementsIntegration()

	tests = [
	("Complete Workflow Integration", test_instance.test_complete_workflow_integration),
	("Classification Consistency", test_instance.test_classification_consistency_validation),
	("Error Handling Throughout Workflow", test_instance.test_error_handling_throughout_workflow),
	("Data Integrity Across Operations", test_instance.test_data_integrity_across_operations),
	("Performance with Multiple Records", test_instance.test_performance_with_multiple_records),
	("Configuration Management", test_instance.test_configuration_management_integration)
	]

	passed = 0
	failed = 0

	for test_name, test_func in tests:
	try:
	print(f"\n🧪 Running: {test_name}")
	test_instance.setup_method()
	test_func()
	test_instance.teardown_method()
	print(f" ✅ {test_name} PASSED")
	passed += 1
	except Exception as e:
	print(f" ❌ {test_name} FAILED: {e}")
	failed += 1
	import traceback
	traceback.print_exc()

	print("\n" + "=" * 70)
	print("📊 INTEGRATION CHECKPOINT RESULTS")
	print("=" * 70)
	print(f"✅ Tests Passed: {passed}")
	print(f"❌ Tests Failed: {failed}")
	print(f"📈 Success Rate: {(passed / (passed + failed)) * 100:.1f}%")

	if failed == 0:
	print("\n🎉 ALL INTEGRATION TESTS PASSED!")
	print("✅ All components work together correctly")
	print("✅ Full workflow from chat to verification validated")
	print("✅ Data integrity maintained across all operations")
	print("✅ Error handling and recovery working properly")
	print("✅ System ready for production use")
	return True
	else:
	print(f"\n⚠️ {failed} integration tests failed")
	print("❌ System requires fixes before production use")
	return False


	if __name__ == "__main__":
	success = run_integration_checkpoint()
	exit(0 if success else 1)