Spaces:
Sleeping
Sleeping
| """ | |
| Final integration tests for verification mode UI polish. | |
| Tests that verify: | |
| - All UI components render correctly | |
| - All buttons and interactions work as expected | |
| - CSV download functionality works end-to-end | |
| - Verification mode integrates seamlessly with existing interface | |
| Requirements: All | |
| """ | |
| import pytest | |
| import tempfile | |
| import os | |
| from datetime import datetime | |
| from unittest.mock import Mock, patch, MagicMock | |
| from src.interface.gradio_app import create_simplified_interface | |
| from src.interface.verification_ui import VerificationUIComponents | |
| from src.core.verification_models import ( | |
| VerificationSession, | |
| VerificationRecord, | |
| TestMessage, | |
| TestDataset, | |
| ) | |
| from src.core.test_datasets import TestDatasetManager | |
| from src.core.verification_store import JSONVerificationStore | |
| from src.core.verification_csv_exporter import VerificationCSVExporter | |
| class TestVerificationModeIntegration: | |
| """Test verification mode integration with main interface.""" | |
| def test_gradio_app_creates_successfully(self): | |
| """Test that Gradio app can be created without errors.""" | |
| try: | |
| interface = create_simplified_interface() | |
| assert interface is not None | |
| assert hasattr(interface, 'launch') | |
| except Exception as e: | |
| pytest.fail(f"Failed to create Gradio interface: {str(e)}") | |
| def test_verification_tab_exists_in_interface(self): | |
| """Test that verification tab is present in the interface.""" | |
| try: | |
| interface = create_simplified_interface() | |
| # Check that the interface has tabs | |
| assert hasattr(interface, 'blocks') | |
| except Exception as e: | |
| pytest.fail(f"Failed to verify tab structure: {str(e)}") | |
| def test_all_ui_components_render_correctly(self): | |
| """Test that all verification UI components render without errors.""" | |
| # Dataset selector | |
| dataset_selector = VerificationUIComponents.create_dataset_selector_component() | |
| assert dataset_selector is not None | |
| # Message review components | |
| message_text, decision_badge, confidence, indicators = ( | |
| VerificationUIComponents.create_message_review_component() | |
| ) | |
| assert message_text is not None | |
| assert decision_badge is not None | |
| assert confidence is not None | |
| assert indicators is not None | |
| # Feedback buttons | |
| correct_btn, incorrect_btn = VerificationUIComponents.create_feedback_buttons() | |
| assert correct_btn is not None | |
| assert incorrect_btn is not None | |
| # Correction selector | |
| correction_selector, notes_field = VerificationUIComponents.create_correction_selector() | |
| assert correction_selector is not None | |
| assert notes_field is not None | |
| # Progress display | |
| progress = VerificationUIComponents.create_progress_display() | |
| assert progress is not None | |
| # Statistics panel | |
| correct_count, incorrect_count, accuracy = ( | |
| VerificationUIComponents.create_statistics_panel() | |
| ) | |
| assert correct_count is not None | |
| assert incorrect_count is not None | |
| assert accuracy is not None | |
| # Breakdown by type | |
| breakdown = VerificationUIComponents.create_breakdown_by_type_component() | |
| assert breakdown is not None | |
| # Summary card | |
| summary = VerificationUIComponents.create_summary_card_component() | |
| assert summary is not None | |
| def test_dataset_selector_has_valid_options(self): | |
| """Test that dataset selector has valid dataset options.""" | |
| datasets = TestDatasetManager.get_dataset_list() | |
| assert len(datasets) > 0 | |
| for dataset in datasets: | |
| assert 'name' in dataset | |
| assert 'dataset_id' in dataset | |
| assert 'message_count' in dataset | |
| assert dataset['message_count'] > 0 | |
| def test_message_review_rendering_with_real_data(self): | |
| """Test message review rendering with real dataset data.""" | |
| # Load a real dataset | |
| datasets = TestDatasetManager.get_dataset_list() | |
| dataset = TestDatasetManager.load_dataset(datasets[0]['dataset_id']) | |
| # Get first message | |
| message = dataset.messages[0] | |
| # Render message review | |
| message_text, decision_badge, confidence, indicators = ( | |
| VerificationUIComponents.render_message_review( | |
| message, | |
| message.pre_classified_label, | |
| 0.85, | |
| ["Indicator 1", "Indicator 2"] | |
| ) | |
| ) | |
| assert message_text == message.text | |
| assert "🟢" in decision_badge or "🟡" in decision_badge or "🔴" in decision_badge | |
| assert "%" in confidence | |
| # The implementation uses comma-separated format with "Detected:" prefix | |
| assert "Indicator 1" in indicators and "Indicator 2" in indicators | |
| def test_classifier_decision_badge_all_types(self): | |
| """Test classifier decision badge for all classification types.""" | |
| for classification_type in ["green", "yellow", "red"]: | |
| badge = VerificationUIComponents.get_classifier_decision_badge(classification_type) | |
| assert badge is not None | |
| assert len(badge) > 0 | |
| # Check for emoji | |
| if classification_type == "green": | |
| assert "🟢" in badge | |
| elif classification_type == "yellow": | |
| assert "🟡" in badge | |
| elif classification_type == "red": | |
| assert "🔴" in badge | |
| def test_confidence_formatting_edge_cases(self): | |
| """Test confidence formatting with edge cases.""" | |
| # Test 0% confidence | |
| formatted = VerificationUIComponents.format_confidence_percentage(0.0) | |
| assert "0%" in formatted | |
| # Test 100% confidence | |
| formatted = VerificationUIComponents.format_confidence_percentage(1.0) | |
| assert "100%" in formatted | |
| # Test 50% confidence | |
| formatted = VerificationUIComponents.format_confidence_percentage(0.5) | |
| assert "50%" in formatted | |
| # Test rounding | |
| formatted = VerificationUIComponents.format_confidence_percentage(0.856) | |
| assert "86%" in formatted | |
| def test_indicators_formatting_empty_list(self): | |
| """Test indicators formatting with empty list.""" | |
| formatted = VerificationUIComponents.format_indicators_as_bullets([]) | |
| # The implementation returns "No specific indicators" for empty list | |
| assert "No specific indicators" in formatted or "no indicators" in formatted.lower() | |
| def test_indicators_formatting_multiple_items(self): | |
| """Test indicators formatting with multiple items.""" | |
| indicators = ["Anxiety", "Stress", "Worry"] | |
| formatted = VerificationUIComponents.format_indicators_as_bullets(indicators) | |
| # The implementation uses comma-separated format with "Detected:" prefix | |
| for indicator in indicators: | |
| assert indicator in formatted | |
| assert "Detected" in formatted | |
| def test_progress_display_accuracy(self): | |
| """Test progress display accuracy.""" | |
| # Test first message | |
| progress = VerificationUIComponents.update_progress_display(0, 10) | |
| assert "1 of 10" in progress | |
| # Test middle message | |
| progress = VerificationUIComponents.update_progress_display(5, 10) | |
| assert "6 of 10" in progress | |
| # Test last message | |
| progress = VerificationUIComponents.update_progress_display(9, 10) | |
| assert "10 of 10" in progress | |
| def test_statistics_display_accuracy_calculation(self): | |
| """Test statistics display accuracy calculation.""" | |
| # Test with 3 correct out of 5 | |
| correct_str, incorrect_str, accuracy_str = ( | |
| VerificationUIComponents.update_statistics_display(3, 2) | |
| ) | |
| assert "3" in correct_str | |
| assert "2" in incorrect_str | |
| assert "60" in accuracy_str # 3/5 = 60% | |
| def test_statistics_display_zero_messages(self): | |
| """Test statistics display with zero messages.""" | |
| correct_str, incorrect_str, accuracy_str = ( | |
| VerificationUIComponents.update_statistics_display(0, 0) | |
| ) | |
| assert "0" in correct_str | |
| assert "0" in incorrect_str | |
| # Zero messages shows "No verifications yet" message | |
| assert "0" in accuracy_str or "No verifications" in accuracy_str | |
| def test_breakdown_by_type_display(self): | |
| """Test breakdown by type display.""" | |
| # Create sample records | |
| records = [ | |
| VerificationRecord( | |
| message_id="1", | |
| original_message="Test", | |
| classifier_decision="green", | |
| classifier_confidence=0.9, | |
| classifier_indicators=[], | |
| ground_truth_label="green", | |
| verifier_notes="", | |
| is_correct=True, | |
| ), | |
| VerificationRecord( | |
| message_id="2", | |
| original_message="Test", | |
| classifier_decision="yellow", | |
| classifier_confidence=0.8, | |
| classifier_indicators=[], | |
| ground_truth_label="yellow", | |
| verifier_notes="", | |
| is_correct=True, | |
| ), | |
| VerificationRecord( | |
| message_id="3", | |
| original_message="Test", | |
| classifier_decision="red", | |
| classifier_confidence=0.95, | |
| classifier_indicators=[], | |
| ground_truth_label="red", | |
| verifier_notes="", | |
| is_correct=True, | |
| ), | |
| ] | |
| breakdown = VerificationUIComponents.update_breakdown_by_type(records) | |
| assert "🟢" in breakdown | |
| assert "🟡" in breakdown | |
| assert "🔴" in breakdown | |
| assert "1 correct" in breakdown | |
| def test_summary_card_rendering(self): | |
| """Test summary card rendering with real session data.""" | |
| # Create a session with records | |
| session = VerificationSession( | |
| session_id="test-session", | |
| verifier_name="Test Verifier", | |
| dataset_id="test-dataset", | |
| dataset_name="Test Dataset", | |
| total_messages=5, | |
| message_queue=["1", "2", "3", "4", "5"], | |
| ) | |
| records = [ | |
| VerificationRecord( | |
| message_id="1", | |
| original_message="Test", | |
| classifier_decision="green", | |
| classifier_confidence=0.9, | |
| classifier_indicators=[], | |
| ground_truth_label="green", | |
| verifier_notes="", | |
| is_correct=True, | |
| ), | |
| VerificationRecord( | |
| message_id="2", | |
| original_message="Test", | |
| classifier_decision="yellow", | |
| classifier_confidence=0.8, | |
| classifier_indicators=[], | |
| ground_truth_label="red", | |
| verifier_notes="Missed indicators", | |
| is_correct=False, | |
| ), | |
| ] | |
| session.verifications = records | |
| session.verified_count = 2 | |
| session.correct_count = 1 | |
| session.incorrect_count = 1 | |
| summary = VerificationUIComponents.render_summary_card(session, records) | |
| assert "Test Dataset" in summary | |
| assert "2" in summary # Total messages reviewed | |
| assert "1" in summary # Correct count | |
| assert "50" in summary # Accuracy percentage | |
| def test_csv_export_end_to_end(self): | |
| """Test CSV export functionality end-to-end.""" | |
| # Create a session with records | |
| session = VerificationSession( | |
| session_id="test-session", | |
| verifier_name="Test Verifier", | |
| dataset_id="test-dataset", | |
| dataset_name="Test Dataset", | |
| total_messages=3, | |
| message_queue=["1", "2", "3"], | |
| ) | |
| records = [ | |
| VerificationRecord( | |
| message_id="1", | |
| original_message="I'm feeling anxious", | |
| classifier_decision="yellow", | |
| classifier_confidence=0.85, | |
| classifier_indicators=["Anxiety"], | |
| ground_truth_label="yellow", | |
| verifier_notes="", | |
| is_correct=True, | |
| ), | |
| VerificationRecord( | |
| message_id="2", | |
| original_message="I want to end it all", | |
| classifier_decision="red", | |
| classifier_confidence=0.95, | |
| classifier_indicators=["Suicidal ideation"], | |
| ground_truth_label="red", | |
| verifier_notes="", | |
| is_correct=True, | |
| ), | |
| VerificationRecord( | |
| message_id="3", | |
| original_message="I'm fine", | |
| classifier_decision="green", | |
| classifier_confidence=0.9, | |
| classifier_indicators=[], | |
| ground_truth_label="yellow", | |
| verifier_notes="False negative", | |
| is_correct=False, | |
| ), | |
| ] | |
| session.verifications = records | |
| session.verified_count = 3 | |
| session.correct_count = 2 | |
| session.incorrect_count = 1 | |
| # Generate CSV | |
| csv_content = VerificationCSVExporter.generate_csv_content(session) | |
| assert csv_content is not None | |
| assert len(csv_content) > 0 | |
| assert "Patient Message" in csv_content | |
| assert "Classifier Said" in csv_content | |
| assert "You Said" in csv_content | |
| assert "I'm feeling anxious" in csv_content | |
| assert "I want to end it all" in csv_content | |
| assert "I'm fine" in csv_content | |
| assert "Total Messages" in csv_content | |
| assert "Accuracy" in csv_content | |
| def test_csv_filename_generation(self): | |
| """Test CSV filename generation.""" | |
| filename = VerificationCSVExporter.generate_csv_filename() | |
| assert filename is not None | |
| assert "verification_results" in filename | |
| assert ".csv" in filename | |
| # Check date format | |
| today = datetime.now().strftime("%Y-%m-%d") | |
| assert today in filename | |
| def test_session_persistence_and_resumption(self): | |
| """Test session persistence and resumption.""" | |
| store = JSONVerificationStore() | |
| # Create and save a session | |
| session = VerificationSession( | |
| session_id="test-session", | |
| verifier_name="Test Verifier", | |
| dataset_id="test-dataset", | |
| dataset_name="Test Dataset", | |
| total_messages=5, | |
| message_queue=["1", "2", "3", "4", "5"], | |
| ) | |
| record = VerificationRecord( | |
| message_id="1", | |
| original_message="Test", | |
| classifier_decision="green", | |
| classifier_confidence=0.9, | |
| classifier_indicators=[], | |
| ground_truth_label="green", | |
| verifier_notes="", | |
| is_correct=True, | |
| ) | |
| session.verifications.append(record) | |
| session.verified_count = 1 | |
| session.correct_count = 1 | |
| # Save session | |
| store.save_session(session) | |
| # Load session | |
| loaded_session = store.load_session(session.session_id) | |
| assert loaded_session is not None | |
| assert loaded_session.session_id == session.session_id | |
| assert loaded_session.verified_count == 1 | |
| assert len(loaded_session.verifications) == 1 | |
| def test_completed_session_immutability(self): | |
| """Test that completed sessions cannot be modified.""" | |
| store = JSONVerificationStore() | |
| # Create and complete a session | |
| session = VerificationSession( | |
| session_id="test-session", | |
| verifier_name="Test Verifier", | |
| dataset_id="test-dataset", | |
| dataset_name="Test Dataset", | |
| total_messages=1, | |
| message_queue=["1"], | |
| ) | |
| session.is_complete = True | |
| session.completed_at = datetime.now() | |
| store.save_session(session) | |
| # Try to load and verify immutability | |
| loaded_session = store.load_session(session.session_id) | |
| assert loaded_session.is_complete is True | |
| # Verify that the session cannot be modified | |
| assert not store.can_modify_session(loaded_session) | |
| def test_error_handling_for_missing_feedback(self): | |
| """Test error handling for missing feedback.""" | |
| from src.core.verification_error_handler import VerificationErrorHandler, ErrorType | |
| error = VerificationErrorHandler.create_error( | |
| ErrorType.MISSING_FEEDBACK, | |
| "Please select if this was correct or incorrect" | |
| ) | |
| assert error is not None | |
| assert error.error_type == ErrorType.MISSING_FEEDBACK | |
| assert "correct or incorrect" in error.user_message | |
| def test_error_handling_for_missing_correction(self): | |
| """Test error handling for missing correction.""" | |
| from src.core.verification_error_handler import VerificationErrorHandler, ErrorType | |
| error = VerificationErrorHandler.create_error( | |
| ErrorType.MISSING_CORRECTION, | |
| "Please select a correction before submitting" | |
| ) | |
| assert error is not None | |
| assert error.error_type == ErrorType.MISSING_CORRECTION | |
| assert "classification" in error.user_message or "correction" in error.user_message | |
| def test_error_handling_for_csv_export_failure(self): | |
| """Test error handling for CSV export failure.""" | |
| from src.core.verification_error_handler import VerificationErrorHandler, ErrorType | |
| error = VerificationErrorHandler.create_error( | |
| ErrorType.CSV_EXPORT_FAILURE, | |
| "Download failed. Please try again." | |
| ) | |
| assert error is not None | |
| assert error.error_type == ErrorType.CSV_EXPORT_FAILURE | |
| assert "Download" in error.user_message | |
| def test_all_buttons_have_correct_variants(self): | |
| """Test that all buttons have correct visual variants.""" | |
| correct_btn, incorrect_btn = VerificationUIComponents.create_feedback_buttons() | |
| # Buttons should have different variants for visual distinction | |
| assert correct_btn is not None | |
| assert incorrect_btn is not None | |
| def test_dataset_metadata_display_accuracy(self): | |
| """Test dataset metadata display accuracy.""" | |
| datasets = TestDatasetManager.get_dataset_list() | |
| dataset = TestDatasetManager.load_dataset(datasets[0]['dataset_id']) | |
| metadata = VerificationUIComponents.render_dataset_metadata(dataset) | |
| assert dataset.name in metadata | |
| assert dataset.description in metadata | |
| assert str(dataset.message_count) in metadata | |
| def test_session_info_display_rendering(self): | |
| """Test session info display rendering.""" | |
| session = VerificationSession( | |
| session_id="test-session", | |
| verifier_name="Test Verifier", | |
| dataset_id="test-dataset", | |
| dataset_name="Test Dataset", | |
| total_messages=10, | |
| message_queue=["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"], | |
| ) | |
| session.verified_count = 5 | |
| session.correct_count = 4 | |
| info = VerificationUIComponents.render_session_info(session) | |
| assert "Test Dataset" in info | |
| assert "Test Verifier" in info | |
| assert "5/10" in info | |
| assert "80" in info # 4/5 = 80% | |
| def test_verification_workflow_state_transitions(self): | |
| """Test state transitions in verification workflow.""" | |
| # Create initial session | |
| session = VerificationSession( | |
| session_id="test-session", | |
| verifier_name="Test Verifier", | |
| dataset_id="test-dataset", | |
| dataset_name="Test Dataset", | |
| total_messages=2, | |
| message_queue=["1", "2"], | |
| ) | |
| assert session.verified_count == 0 | |
| assert session.is_complete is False | |
| # Add first verification | |
| record1 = VerificationRecord( | |
| message_id="1", | |
| original_message="Test 1", | |
| classifier_decision="green", | |
| classifier_confidence=0.9, | |
| classifier_indicators=[], | |
| ground_truth_label="green", | |
| verifier_notes="", | |
| is_correct=True, | |
| ) | |
| session.verifications.append(record1) | |
| session.verified_count = 1 | |
| session.correct_count = 1 | |
| assert session.verified_count == 1 | |
| assert session.is_complete is False | |
| # Add second verification | |
| record2 = VerificationRecord( | |
| message_id="2", | |
| original_message="Test 2", | |
| classifier_decision="yellow", | |
| classifier_confidence=0.8, | |
| classifier_indicators=[], | |
| ground_truth_label="yellow", | |
| verifier_notes="", | |
| is_correct=True, | |
| ) | |
| session.verifications.append(record2) | |
| session.verified_count = 2 | |
| session.correct_count = 2 | |
| # Mark as complete | |
| session.is_complete = True | |
| session.completed_at = datetime.now() | |
| assert session.verified_count == 2 | |
| assert session.is_complete is True | |
| assert len(session.verifications) == 2 | |
| class TestUIComponentsConsistency: | |
| """Test consistency of UI components across different states.""" | |
| def test_badge_colors_consistent(self): | |
| """Test that badge colors are consistent.""" | |
| green_badge = VerificationUIComponents.get_classifier_decision_badge("green") | |
| yellow_badge = VerificationUIComponents.get_classifier_decision_badge("yellow") | |
| red_badge = VerificationUIComponents.get_classifier_decision_badge("red") | |
| assert "🟢" in green_badge | |
| assert "🟡" in yellow_badge | |
| assert "🔴" in red_badge | |
| # Test case insensitivity | |
| green_badge_upper = VerificationUIComponents.get_classifier_decision_badge("GREEN") | |
| assert "🟢" in green_badge_upper | |
| def test_progress_display_format_consistency(self): | |
| """Test that progress display format is consistent.""" | |
| progress1 = VerificationUIComponents.update_progress_display(0, 5) | |
| progress2 = VerificationUIComponents.update_progress_display(2, 5) | |
| progress3 = VerificationUIComponents.update_progress_display(4, 5) | |
| # All should have the same format | |
| assert "Progress:" in progress1 | |
| assert "Progress:" in progress2 | |
| assert "Progress:" in progress3 | |
| assert "of" in progress1 | |
| assert "of" in progress2 | |
| assert "of" in progress3 | |
| def test_statistics_display_format_consistency(self): | |
| """Test that statistics display format is consistent.""" | |
| correct1, incorrect1, accuracy1 = ( | |
| VerificationUIComponents.update_statistics_display(1, 0) | |
| ) | |
| correct2, incorrect2, accuracy2 = ( | |
| VerificationUIComponents.update_statistics_display(2, 1) | |
| ) | |
| # All should have consistent format | |
| assert "Correct:" in correct1 | |
| assert "Correct:" in correct2 | |
| assert "Incorrect:" in incorrect1 | |
| assert "Incorrect:" in incorrect2 | |
| assert "Accuracy:" in accuracy1 | |
| assert "Accuracy:" in accuracy2 | |