""" Final integration tests for verification mode UI polish. Tests that verify: - All UI components render correctly - All buttons and interactions work as expected - CSV download functionality works end-to-end - Verification mode integrates seamlessly with existing interface Requirements: All """ import pytest import tempfile import os from datetime import datetime from unittest.mock import Mock, patch, MagicMock from src.interface.gradio_app import create_simplified_interface from src.interface.verification_ui import VerificationUIComponents from src.core.verification_models import ( VerificationSession, VerificationRecord, TestMessage, TestDataset, ) from src.core.test_datasets import TestDatasetManager from src.core.verification_store import JSONVerificationStore from src.core.verification_csv_exporter import VerificationCSVExporter class TestVerificationModeIntegration: """Test verification mode integration with main interface.""" def test_gradio_app_creates_successfully(self): """Test that Gradio app can be created without errors.""" try: interface = create_simplified_interface() assert interface is not None assert hasattr(interface, 'launch') except Exception as e: pytest.fail(f"Failed to create Gradio interface: {str(e)}") def test_verification_tab_exists_in_interface(self): """Test that verification tab is present in the interface.""" try: interface = create_simplified_interface() # Check that the interface has tabs assert hasattr(interface, 'blocks') except Exception as e: pytest.fail(f"Failed to verify tab structure: {str(e)}") def test_all_ui_components_render_correctly(self): """Test that all verification UI components render without errors.""" # Dataset selector dataset_selector = VerificationUIComponents.create_dataset_selector_component() assert dataset_selector is not None # Message review components message_text, decision_badge, confidence, indicators = ( VerificationUIComponents.create_message_review_component() ) assert message_text is not None assert decision_badge is not None assert confidence is not None assert indicators is not None # Feedback buttons correct_btn, incorrect_btn = VerificationUIComponents.create_feedback_buttons() assert correct_btn is not None assert incorrect_btn is not None # Correction selector correction_selector, notes_field = VerificationUIComponents.create_correction_selector() assert correction_selector is not None assert notes_field is not None # Progress display progress = VerificationUIComponents.create_progress_display() assert progress is not None # Statistics panel correct_count, incorrect_count, accuracy = ( VerificationUIComponents.create_statistics_panel() ) assert correct_count is not None assert incorrect_count is not None assert accuracy is not None # Breakdown by type breakdown = VerificationUIComponents.create_breakdown_by_type_component() assert breakdown is not None # Summary card summary = VerificationUIComponents.create_summary_card_component() assert summary is not None def test_dataset_selector_has_valid_options(self): """Test that dataset selector has valid dataset options.""" datasets = TestDatasetManager.get_dataset_list() assert len(datasets) > 0 for dataset in datasets: assert 'name' in dataset assert 'dataset_id' in dataset assert 'message_count' in dataset assert dataset['message_count'] > 0 def test_message_review_rendering_with_real_data(self): """Test message review rendering with real dataset data.""" # Load a real dataset datasets = TestDatasetManager.get_dataset_list() dataset = TestDatasetManager.load_dataset(datasets[0]['dataset_id']) # Get first message message = dataset.messages[0] # Render message review message_text, decision_badge, confidence, indicators = ( VerificationUIComponents.render_message_review( message, message.pre_classified_label, 0.85, ["Indicator 1", "Indicator 2"] ) ) assert message_text == message.text assert "🟢" in decision_badge or "🟡" in decision_badge or "🔴" in decision_badge assert "%" in confidence # The implementation uses comma-separated format with "Detected:" prefix assert "Indicator 1" in indicators and "Indicator 2" in indicators def test_classifier_decision_badge_all_types(self): """Test classifier decision badge for all classification types.""" for classification_type in ["green", "yellow", "red"]: badge = VerificationUIComponents.get_classifier_decision_badge(classification_type) assert badge is not None assert len(badge) > 0 # Check for emoji if classification_type == "green": assert "🟢" in badge elif classification_type == "yellow": assert "🟡" in badge elif classification_type == "red": assert "🔴" in badge def test_confidence_formatting_edge_cases(self): """Test confidence formatting with edge cases.""" # Test 0% confidence formatted = VerificationUIComponents.format_confidence_percentage(0.0) assert "0%" in formatted # Test 100% confidence formatted = VerificationUIComponents.format_confidence_percentage(1.0) assert "100%" in formatted # Test 50% confidence formatted = VerificationUIComponents.format_confidence_percentage(0.5) assert "50%" in formatted # Test rounding formatted = VerificationUIComponents.format_confidence_percentage(0.856) assert "86%" in formatted def test_indicators_formatting_empty_list(self): """Test indicators formatting with empty list.""" formatted = VerificationUIComponents.format_indicators_as_bullets([]) # The implementation returns "No specific indicators" for empty list assert "No specific indicators" in formatted or "no indicators" in formatted.lower() def test_indicators_formatting_multiple_items(self): """Test indicators formatting with multiple items.""" indicators = ["Anxiety", "Stress", "Worry"] formatted = VerificationUIComponents.format_indicators_as_bullets(indicators) # The implementation uses comma-separated format with "Detected:" prefix for indicator in indicators: assert indicator in formatted assert "Detected" in formatted def test_progress_display_accuracy(self): """Test progress display accuracy.""" # Test first message progress = VerificationUIComponents.update_progress_display(0, 10) assert "1 of 10" in progress # Test middle message progress = VerificationUIComponents.update_progress_display(5, 10) assert "6 of 10" in progress # Test last message progress = VerificationUIComponents.update_progress_display(9, 10) assert "10 of 10" in progress def test_statistics_display_accuracy_calculation(self): """Test statistics display accuracy calculation.""" # Test with 3 correct out of 5 correct_str, incorrect_str, accuracy_str = ( VerificationUIComponents.update_statistics_display(3, 2) ) assert "3" in correct_str assert "2" in incorrect_str assert "60" in accuracy_str # 3/5 = 60% def test_statistics_display_zero_messages(self): """Test statistics display with zero messages.""" correct_str, incorrect_str, accuracy_str = ( VerificationUIComponents.update_statistics_display(0, 0) ) assert "0" in correct_str assert "0" in incorrect_str # Zero messages shows "No verifications yet" message assert "0" in accuracy_str or "No verifications" in accuracy_str def test_breakdown_by_type_display(self): """Test breakdown by type display.""" # Create sample records records = [ VerificationRecord( message_id="1", original_message="Test", classifier_decision="green", classifier_confidence=0.9, classifier_indicators=[], ground_truth_label="green", verifier_notes="", is_correct=True, ), VerificationRecord( message_id="2", original_message="Test", classifier_decision="yellow", classifier_confidence=0.8, classifier_indicators=[], ground_truth_label="yellow", verifier_notes="", is_correct=True, ), VerificationRecord( message_id="3", original_message="Test", classifier_decision="red", classifier_confidence=0.95, classifier_indicators=[], ground_truth_label="red", verifier_notes="", is_correct=True, ), ] breakdown = VerificationUIComponents.update_breakdown_by_type(records) assert "🟢" in breakdown assert "🟡" in breakdown assert "🔴" in breakdown assert "1 correct" in breakdown def test_summary_card_rendering(self): """Test summary card rendering with real session data.""" # Create a session with records session = VerificationSession( session_id="test-session", verifier_name="Test Verifier", dataset_id="test-dataset", dataset_name="Test Dataset", total_messages=5, message_queue=["1", "2", "3", "4", "5"], ) records = [ VerificationRecord( message_id="1", original_message="Test", classifier_decision="green", classifier_confidence=0.9, classifier_indicators=[], ground_truth_label="green", verifier_notes="", is_correct=True, ), VerificationRecord( message_id="2", original_message="Test", classifier_decision="yellow", classifier_confidence=0.8, classifier_indicators=[], ground_truth_label="red", verifier_notes="Missed indicators", is_correct=False, ), ] session.verifications = records session.verified_count = 2 session.correct_count = 1 session.incorrect_count = 1 summary = VerificationUIComponents.render_summary_card(session, records) assert "Test Dataset" in summary assert "2" in summary # Total messages reviewed assert "1" in summary # Correct count assert "50" in summary # Accuracy percentage def test_csv_export_end_to_end(self): """Test CSV export functionality end-to-end.""" # Create a session with records session = VerificationSession( session_id="test-session", verifier_name="Test Verifier", dataset_id="test-dataset", dataset_name="Test Dataset", total_messages=3, message_queue=["1", "2", "3"], ) records = [ VerificationRecord( message_id="1", original_message="I'm feeling anxious", classifier_decision="yellow", classifier_confidence=0.85, classifier_indicators=["Anxiety"], ground_truth_label="yellow", verifier_notes="", is_correct=True, ), VerificationRecord( message_id="2", original_message="I want to end it all", classifier_decision="red", classifier_confidence=0.95, classifier_indicators=["Suicidal ideation"], ground_truth_label="red", verifier_notes="", is_correct=True, ), VerificationRecord( message_id="3", original_message="I'm fine", classifier_decision="green", classifier_confidence=0.9, classifier_indicators=[], ground_truth_label="yellow", verifier_notes="False negative", is_correct=False, ), ] session.verifications = records session.verified_count = 3 session.correct_count = 2 session.incorrect_count = 1 # Generate CSV csv_content = VerificationCSVExporter.generate_csv_content(session) assert csv_content is not None assert len(csv_content) > 0 assert "Patient Message" in csv_content assert "Classifier Said" in csv_content assert "You Said" in csv_content assert "I'm feeling anxious" in csv_content assert "I want to end it all" in csv_content assert "I'm fine" in csv_content assert "Total Messages" in csv_content assert "Accuracy" in csv_content def test_csv_filename_generation(self): """Test CSV filename generation.""" filename = VerificationCSVExporter.generate_csv_filename() assert filename is not None assert "verification_results" in filename assert ".csv" in filename # Check date format today = datetime.now().strftime("%Y-%m-%d") assert today in filename def test_session_persistence_and_resumption(self): """Test session persistence and resumption.""" store = JSONVerificationStore() # Create and save a session session = VerificationSession( session_id="test-session", verifier_name="Test Verifier", dataset_id="test-dataset", dataset_name="Test Dataset", total_messages=5, message_queue=["1", "2", "3", "4", "5"], ) record = VerificationRecord( message_id="1", original_message="Test", classifier_decision="green", classifier_confidence=0.9, classifier_indicators=[], ground_truth_label="green", verifier_notes="", is_correct=True, ) session.verifications.append(record) session.verified_count = 1 session.correct_count = 1 # Save session store.save_session(session) # Load session loaded_session = store.load_session(session.session_id) assert loaded_session is not None assert loaded_session.session_id == session.session_id assert loaded_session.verified_count == 1 assert len(loaded_session.verifications) == 1 def test_completed_session_immutability(self): """Test that completed sessions cannot be modified.""" store = JSONVerificationStore() # Create and complete a session session = VerificationSession( session_id="test-session", verifier_name="Test Verifier", dataset_id="test-dataset", dataset_name="Test Dataset", total_messages=1, message_queue=["1"], ) session.is_complete = True session.completed_at = datetime.now() store.save_session(session) # Try to load and verify immutability loaded_session = store.load_session(session.session_id) assert loaded_session.is_complete is True # Verify that the session cannot be modified assert not store.can_modify_session(loaded_session) def test_error_handling_for_missing_feedback(self): """Test error handling for missing feedback.""" from src.core.verification_error_handler import VerificationErrorHandler, ErrorType error = VerificationErrorHandler.create_error( ErrorType.MISSING_FEEDBACK, "Please select if this was correct or incorrect" ) assert error is not None assert error.error_type == ErrorType.MISSING_FEEDBACK assert "correct or incorrect" in error.user_message def test_error_handling_for_missing_correction(self): """Test error handling for missing correction.""" from src.core.verification_error_handler import VerificationErrorHandler, ErrorType error = VerificationErrorHandler.create_error( ErrorType.MISSING_CORRECTION, "Please select a correction before submitting" ) assert error is not None assert error.error_type == ErrorType.MISSING_CORRECTION assert "classification" in error.user_message or "correction" in error.user_message def test_error_handling_for_csv_export_failure(self): """Test error handling for CSV export failure.""" from src.core.verification_error_handler import VerificationErrorHandler, ErrorType error = VerificationErrorHandler.create_error( ErrorType.CSV_EXPORT_FAILURE, "Download failed. Please try again." ) assert error is not None assert error.error_type == ErrorType.CSV_EXPORT_FAILURE assert "Download" in error.user_message def test_all_buttons_have_correct_variants(self): """Test that all buttons have correct visual variants.""" correct_btn, incorrect_btn = VerificationUIComponents.create_feedback_buttons() # Buttons should have different variants for visual distinction assert correct_btn is not None assert incorrect_btn is not None def test_dataset_metadata_display_accuracy(self): """Test dataset metadata display accuracy.""" datasets = TestDatasetManager.get_dataset_list() dataset = TestDatasetManager.load_dataset(datasets[0]['dataset_id']) metadata = VerificationUIComponents.render_dataset_metadata(dataset) assert dataset.name in metadata assert dataset.description in metadata assert str(dataset.message_count) in metadata def test_session_info_display_rendering(self): """Test session info display rendering.""" session = VerificationSession( session_id="test-session", verifier_name="Test Verifier", dataset_id="test-dataset", dataset_name="Test Dataset", total_messages=10, message_queue=["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"], ) session.verified_count = 5 session.correct_count = 4 info = VerificationUIComponents.render_session_info(session) assert "Test Dataset" in info assert "Test Verifier" in info assert "5/10" in info assert "80" in info # 4/5 = 80% def test_verification_workflow_state_transitions(self): """Test state transitions in verification workflow.""" # Create initial session session = VerificationSession( session_id="test-session", verifier_name="Test Verifier", dataset_id="test-dataset", dataset_name="Test Dataset", total_messages=2, message_queue=["1", "2"], ) assert session.verified_count == 0 assert session.is_complete is False # Add first verification record1 = VerificationRecord( message_id="1", original_message="Test 1", classifier_decision="green", classifier_confidence=0.9, classifier_indicators=[], ground_truth_label="green", verifier_notes="", is_correct=True, ) session.verifications.append(record1) session.verified_count = 1 session.correct_count = 1 assert session.verified_count == 1 assert session.is_complete is False # Add second verification record2 = VerificationRecord( message_id="2", original_message="Test 2", classifier_decision="yellow", classifier_confidence=0.8, classifier_indicators=[], ground_truth_label="yellow", verifier_notes="", is_correct=True, ) session.verifications.append(record2) session.verified_count = 2 session.correct_count = 2 # Mark as complete session.is_complete = True session.completed_at = datetime.now() assert session.verified_count == 2 assert session.is_complete is True assert len(session.verifications) == 2 class TestUIComponentsConsistency: """Test consistency of UI components across different states.""" def test_badge_colors_consistent(self): """Test that badge colors are consistent.""" green_badge = VerificationUIComponents.get_classifier_decision_badge("green") yellow_badge = VerificationUIComponents.get_classifier_decision_badge("yellow") red_badge = VerificationUIComponents.get_classifier_decision_badge("red") assert "🟢" in green_badge assert "🟡" in yellow_badge assert "🔴" in red_badge # Test case insensitivity green_badge_upper = VerificationUIComponents.get_classifier_decision_badge("GREEN") assert "🟢" in green_badge_upper def test_progress_display_format_consistency(self): """Test that progress display format is consistent.""" progress1 = VerificationUIComponents.update_progress_display(0, 5) progress2 = VerificationUIComponents.update_progress_display(2, 5) progress3 = VerificationUIComponents.update_progress_display(4, 5) # All should have the same format assert "Progress:" in progress1 assert "Progress:" in progress2 assert "Progress:" in progress3 assert "of" in progress1 assert "of" in progress2 assert "of" in progress3 def test_statistics_display_format_consistency(self): """Test that statistics display format is consistent.""" correct1, incorrect1, accuracy1 = ( VerificationUIComponents.update_statistics_display(1, 0) ) correct2, incorrect2, accuracy2 = ( VerificationUIComponents.update_statistics_display(2, 1) ) # All should have consistent format assert "Correct:" in correct1 assert "Correct:" in correct2 assert "Incorrect:" in incorrect1 assert "Incorrect:" in incorrect2 assert "Accuracy:" in accuracy1 assert "Accuracy:" in accuracy2