DocUA's picture
Implement Or_4.txt feedback: Provider Summary as final exchange, simplified interface improvements
e8c7fad
"""
Final integration tests for verification mode UI polish.
Tests that verify:
- All UI components render correctly
- All buttons and interactions work as expected
- CSV download functionality works end-to-end
- Verification mode integrates seamlessly with existing interface
Requirements: All
"""
import pytest
import tempfile
import os
from datetime import datetime
from unittest.mock import Mock, patch, MagicMock
from src.interface.gradio_app import create_simplified_interface
from src.interface.verification_ui import VerificationUIComponents
from src.core.verification_models import (
VerificationSession,
VerificationRecord,
TestMessage,
TestDataset,
)
from src.core.test_datasets import TestDatasetManager
from src.core.verification_store import JSONVerificationStore
from src.core.verification_csv_exporter import VerificationCSVExporter
class TestVerificationModeIntegration:
"""Test verification mode integration with main interface."""
def test_gradio_app_creates_successfully(self):
"""Test that Gradio app can be created without errors."""
try:
interface = create_simplified_interface()
assert interface is not None
assert hasattr(interface, 'launch')
except Exception as e:
pytest.fail(f"Failed to create Gradio interface: {str(e)}")
def test_verification_tab_exists_in_interface(self):
"""Test that verification tab is present in the interface."""
try:
interface = create_simplified_interface()
# Check that the interface has tabs
assert hasattr(interface, 'blocks')
except Exception as e:
pytest.fail(f"Failed to verify tab structure: {str(e)}")
def test_all_ui_components_render_correctly(self):
"""Test that all verification UI components render without errors."""
# Dataset selector
dataset_selector = VerificationUIComponents.create_dataset_selector_component()
assert dataset_selector is not None
# Message review components
message_text, decision_badge, confidence, indicators = (
VerificationUIComponents.create_message_review_component()
)
assert message_text is not None
assert decision_badge is not None
assert confidence is not None
assert indicators is not None
# Feedback buttons
correct_btn, incorrect_btn = VerificationUIComponents.create_feedback_buttons()
assert correct_btn is not None
assert incorrect_btn is not None
# Correction selector
correction_selector, notes_field = VerificationUIComponents.create_correction_selector()
assert correction_selector is not None
assert notes_field is not None
# Progress display
progress = VerificationUIComponents.create_progress_display()
assert progress is not None
# Statistics panel
correct_count, incorrect_count, accuracy = (
VerificationUIComponents.create_statistics_panel()
)
assert correct_count is not None
assert incorrect_count is not None
assert accuracy is not None
# Breakdown by type
breakdown = VerificationUIComponents.create_breakdown_by_type_component()
assert breakdown is not None
# Summary card
summary = VerificationUIComponents.create_summary_card_component()
assert summary is not None
def test_dataset_selector_has_valid_options(self):
"""Test that dataset selector has valid dataset options."""
datasets = TestDatasetManager.get_dataset_list()
assert len(datasets) > 0
for dataset in datasets:
assert 'name' in dataset
assert 'dataset_id' in dataset
assert 'message_count' in dataset
assert dataset['message_count'] > 0
def test_message_review_rendering_with_real_data(self):
"""Test message review rendering with real dataset data."""
# Load a real dataset
datasets = TestDatasetManager.get_dataset_list()
dataset = TestDatasetManager.load_dataset(datasets[0]['dataset_id'])
# Get first message
message = dataset.messages[0]
# Render message review
message_text, decision_badge, confidence, indicators = (
VerificationUIComponents.render_message_review(
message,
message.pre_classified_label,
0.85,
["Indicator 1", "Indicator 2"]
)
)
assert message_text == message.text
assert "🟢" in decision_badge or "🟡" in decision_badge or "🔴" in decision_badge
assert "%" in confidence
# The implementation uses comma-separated format with "Detected:" prefix
assert "Indicator 1" in indicators and "Indicator 2" in indicators
def test_classifier_decision_badge_all_types(self):
"""Test classifier decision badge for all classification types."""
for classification_type in ["green", "yellow", "red"]:
badge = VerificationUIComponents.get_classifier_decision_badge(classification_type)
assert badge is not None
assert len(badge) > 0
# Check for emoji
if classification_type == "green":
assert "🟢" in badge
elif classification_type == "yellow":
assert "🟡" in badge
elif classification_type == "red":
assert "🔴" in badge
def test_confidence_formatting_edge_cases(self):
"""Test confidence formatting with edge cases."""
# Test 0% confidence
formatted = VerificationUIComponents.format_confidence_percentage(0.0)
assert "0%" in formatted
# Test 100% confidence
formatted = VerificationUIComponents.format_confidence_percentage(1.0)
assert "100%" in formatted
# Test 50% confidence
formatted = VerificationUIComponents.format_confidence_percentage(0.5)
assert "50%" in formatted
# Test rounding
formatted = VerificationUIComponents.format_confidence_percentage(0.856)
assert "86%" in formatted
def test_indicators_formatting_empty_list(self):
"""Test indicators formatting with empty list."""
formatted = VerificationUIComponents.format_indicators_as_bullets([])
# The implementation returns "No specific indicators" for empty list
assert "No specific indicators" in formatted or "no indicators" in formatted.lower()
def test_indicators_formatting_multiple_items(self):
"""Test indicators formatting with multiple items."""
indicators = ["Anxiety", "Stress", "Worry"]
formatted = VerificationUIComponents.format_indicators_as_bullets(indicators)
# The implementation uses comma-separated format with "Detected:" prefix
for indicator in indicators:
assert indicator in formatted
assert "Detected" in formatted
def test_progress_display_accuracy(self):
"""Test progress display accuracy."""
# Test first message
progress = VerificationUIComponents.update_progress_display(0, 10)
assert "1 of 10" in progress
# Test middle message
progress = VerificationUIComponents.update_progress_display(5, 10)
assert "6 of 10" in progress
# Test last message
progress = VerificationUIComponents.update_progress_display(9, 10)
assert "10 of 10" in progress
def test_statistics_display_accuracy_calculation(self):
"""Test statistics display accuracy calculation."""
# Test with 3 correct out of 5
correct_str, incorrect_str, accuracy_str = (
VerificationUIComponents.update_statistics_display(3, 2)
)
assert "3" in correct_str
assert "2" in incorrect_str
assert "60" in accuracy_str # 3/5 = 60%
def test_statistics_display_zero_messages(self):
"""Test statistics display with zero messages."""
correct_str, incorrect_str, accuracy_str = (
VerificationUIComponents.update_statistics_display(0, 0)
)
assert "0" in correct_str
assert "0" in incorrect_str
# Zero messages shows "No verifications yet" message
assert "0" in accuracy_str or "No verifications" in accuracy_str
def test_breakdown_by_type_display(self):
"""Test breakdown by type display."""
# Create sample records
records = [
VerificationRecord(
message_id="1",
original_message="Test",
classifier_decision="green",
classifier_confidence=0.9,
classifier_indicators=[],
ground_truth_label="green",
verifier_notes="",
is_correct=True,
),
VerificationRecord(
message_id="2",
original_message="Test",
classifier_decision="yellow",
classifier_confidence=0.8,
classifier_indicators=[],
ground_truth_label="yellow",
verifier_notes="",
is_correct=True,
),
VerificationRecord(
message_id="3",
original_message="Test",
classifier_decision="red",
classifier_confidence=0.95,
classifier_indicators=[],
ground_truth_label="red",
verifier_notes="",
is_correct=True,
),
]
breakdown = VerificationUIComponents.update_breakdown_by_type(records)
assert "🟢" in breakdown
assert "🟡" in breakdown
assert "🔴" in breakdown
assert "1 correct" in breakdown
def test_summary_card_rendering(self):
"""Test summary card rendering with real session data."""
# Create a session with records
session = VerificationSession(
session_id="test-session",
verifier_name="Test Verifier",
dataset_id="test-dataset",
dataset_name="Test Dataset",
total_messages=5,
message_queue=["1", "2", "3", "4", "5"],
)
records = [
VerificationRecord(
message_id="1",
original_message="Test",
classifier_decision="green",
classifier_confidence=0.9,
classifier_indicators=[],
ground_truth_label="green",
verifier_notes="",
is_correct=True,
),
VerificationRecord(
message_id="2",
original_message="Test",
classifier_decision="yellow",
classifier_confidence=0.8,
classifier_indicators=[],
ground_truth_label="red",
verifier_notes="Missed indicators",
is_correct=False,
),
]
session.verifications = records
session.verified_count = 2
session.correct_count = 1
session.incorrect_count = 1
summary = VerificationUIComponents.render_summary_card(session, records)
assert "Test Dataset" in summary
assert "2" in summary # Total messages reviewed
assert "1" in summary # Correct count
assert "50" in summary # Accuracy percentage
def test_csv_export_end_to_end(self):
"""Test CSV export functionality end-to-end."""
# Create a session with records
session = VerificationSession(
session_id="test-session",
verifier_name="Test Verifier",
dataset_id="test-dataset",
dataset_name="Test Dataset",
total_messages=3,
message_queue=["1", "2", "3"],
)
records = [
VerificationRecord(
message_id="1",
original_message="I'm feeling anxious",
classifier_decision="yellow",
classifier_confidence=0.85,
classifier_indicators=["Anxiety"],
ground_truth_label="yellow",
verifier_notes="",
is_correct=True,
),
VerificationRecord(
message_id="2",
original_message="I want to end it all",
classifier_decision="red",
classifier_confidence=0.95,
classifier_indicators=["Suicidal ideation"],
ground_truth_label="red",
verifier_notes="",
is_correct=True,
),
VerificationRecord(
message_id="3",
original_message="I'm fine",
classifier_decision="green",
classifier_confidence=0.9,
classifier_indicators=[],
ground_truth_label="yellow",
verifier_notes="False negative",
is_correct=False,
),
]
session.verifications = records
session.verified_count = 3
session.correct_count = 2
session.incorrect_count = 1
# Generate CSV
csv_content = VerificationCSVExporter.generate_csv_content(session)
assert csv_content is not None
assert len(csv_content) > 0
assert "Patient Message" in csv_content
assert "Classifier Said" in csv_content
assert "You Said" in csv_content
assert "I'm feeling anxious" in csv_content
assert "I want to end it all" in csv_content
assert "I'm fine" in csv_content
assert "Total Messages" in csv_content
assert "Accuracy" in csv_content
def test_csv_filename_generation(self):
"""Test CSV filename generation."""
filename = VerificationCSVExporter.generate_csv_filename()
assert filename is not None
assert "verification_results" in filename
assert ".csv" in filename
# Check date format
today = datetime.now().strftime("%Y-%m-%d")
assert today in filename
def test_session_persistence_and_resumption(self):
"""Test session persistence and resumption."""
store = JSONVerificationStore()
# Create and save a session
session = VerificationSession(
session_id="test-session",
verifier_name="Test Verifier",
dataset_id="test-dataset",
dataset_name="Test Dataset",
total_messages=5,
message_queue=["1", "2", "3", "4", "5"],
)
record = VerificationRecord(
message_id="1",
original_message="Test",
classifier_decision="green",
classifier_confidence=0.9,
classifier_indicators=[],
ground_truth_label="green",
verifier_notes="",
is_correct=True,
)
session.verifications.append(record)
session.verified_count = 1
session.correct_count = 1
# Save session
store.save_session(session)
# Load session
loaded_session = store.load_session(session.session_id)
assert loaded_session is not None
assert loaded_session.session_id == session.session_id
assert loaded_session.verified_count == 1
assert len(loaded_session.verifications) == 1
def test_completed_session_immutability(self):
"""Test that completed sessions cannot be modified."""
store = JSONVerificationStore()
# Create and complete a session
session = VerificationSession(
session_id="test-session",
verifier_name="Test Verifier",
dataset_id="test-dataset",
dataset_name="Test Dataset",
total_messages=1,
message_queue=["1"],
)
session.is_complete = True
session.completed_at = datetime.now()
store.save_session(session)
# Try to load and verify immutability
loaded_session = store.load_session(session.session_id)
assert loaded_session.is_complete is True
# Verify that the session cannot be modified
assert not store.can_modify_session(loaded_session)
def test_error_handling_for_missing_feedback(self):
"""Test error handling for missing feedback."""
from src.core.verification_error_handler import VerificationErrorHandler, ErrorType
error = VerificationErrorHandler.create_error(
ErrorType.MISSING_FEEDBACK,
"Please select if this was correct or incorrect"
)
assert error is not None
assert error.error_type == ErrorType.MISSING_FEEDBACK
assert "correct or incorrect" in error.user_message
def test_error_handling_for_missing_correction(self):
"""Test error handling for missing correction."""
from src.core.verification_error_handler import VerificationErrorHandler, ErrorType
error = VerificationErrorHandler.create_error(
ErrorType.MISSING_CORRECTION,
"Please select a correction before submitting"
)
assert error is not None
assert error.error_type == ErrorType.MISSING_CORRECTION
assert "classification" in error.user_message or "correction" in error.user_message
def test_error_handling_for_csv_export_failure(self):
"""Test error handling for CSV export failure."""
from src.core.verification_error_handler import VerificationErrorHandler, ErrorType
error = VerificationErrorHandler.create_error(
ErrorType.CSV_EXPORT_FAILURE,
"Download failed. Please try again."
)
assert error is not None
assert error.error_type == ErrorType.CSV_EXPORT_FAILURE
assert "Download" in error.user_message
def test_all_buttons_have_correct_variants(self):
"""Test that all buttons have correct visual variants."""
correct_btn, incorrect_btn = VerificationUIComponents.create_feedback_buttons()
# Buttons should have different variants for visual distinction
assert correct_btn is not None
assert incorrect_btn is not None
def test_dataset_metadata_display_accuracy(self):
"""Test dataset metadata display accuracy."""
datasets = TestDatasetManager.get_dataset_list()
dataset = TestDatasetManager.load_dataset(datasets[0]['dataset_id'])
metadata = VerificationUIComponents.render_dataset_metadata(dataset)
assert dataset.name in metadata
assert dataset.description in metadata
assert str(dataset.message_count) in metadata
def test_session_info_display_rendering(self):
"""Test session info display rendering."""
session = VerificationSession(
session_id="test-session",
verifier_name="Test Verifier",
dataset_id="test-dataset",
dataset_name="Test Dataset",
total_messages=10,
message_queue=["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
)
session.verified_count = 5
session.correct_count = 4
info = VerificationUIComponents.render_session_info(session)
assert "Test Dataset" in info
assert "Test Verifier" in info
assert "5/10" in info
assert "80" in info # 4/5 = 80%
def test_verification_workflow_state_transitions(self):
"""Test state transitions in verification workflow."""
# Create initial session
session = VerificationSession(
session_id="test-session",
verifier_name="Test Verifier",
dataset_id="test-dataset",
dataset_name="Test Dataset",
total_messages=2,
message_queue=["1", "2"],
)
assert session.verified_count == 0
assert session.is_complete is False
# Add first verification
record1 = VerificationRecord(
message_id="1",
original_message="Test 1",
classifier_decision="green",
classifier_confidence=0.9,
classifier_indicators=[],
ground_truth_label="green",
verifier_notes="",
is_correct=True,
)
session.verifications.append(record1)
session.verified_count = 1
session.correct_count = 1
assert session.verified_count == 1
assert session.is_complete is False
# Add second verification
record2 = VerificationRecord(
message_id="2",
original_message="Test 2",
classifier_decision="yellow",
classifier_confidence=0.8,
classifier_indicators=[],
ground_truth_label="yellow",
verifier_notes="",
is_correct=True,
)
session.verifications.append(record2)
session.verified_count = 2
session.correct_count = 2
# Mark as complete
session.is_complete = True
session.completed_at = datetime.now()
assert session.verified_count == 2
assert session.is_complete is True
assert len(session.verifications) == 2
class TestUIComponentsConsistency:
"""Test consistency of UI components across different states."""
def test_badge_colors_consistent(self):
"""Test that badge colors are consistent."""
green_badge = VerificationUIComponents.get_classifier_decision_badge("green")
yellow_badge = VerificationUIComponents.get_classifier_decision_badge("yellow")
red_badge = VerificationUIComponents.get_classifier_decision_badge("red")
assert "🟢" in green_badge
assert "🟡" in yellow_badge
assert "🔴" in red_badge
# Test case insensitivity
green_badge_upper = VerificationUIComponents.get_classifier_decision_badge("GREEN")
assert "🟢" in green_badge_upper
def test_progress_display_format_consistency(self):
"""Test that progress display format is consistent."""
progress1 = VerificationUIComponents.update_progress_display(0, 5)
progress2 = VerificationUIComponents.update_progress_display(2, 5)
progress3 = VerificationUIComponents.update_progress_display(4, 5)
# All should have the same format
assert "Progress:" in progress1
assert "Progress:" in progress2
assert "Progress:" in progress3
assert "of" in progress1
assert "of" in progress2
assert "of" in progress3
def test_statistics_display_format_consistency(self):
"""Test that statistics display format is consistent."""
correct1, incorrect1, accuracy1 = (
VerificationUIComponents.update_statistics_display(1, 0)
)
correct2, incorrect2, accuracy2 = (
VerificationUIComponents.update_statistics_display(2, 1)
)
# All should have consistent format
assert "Correct:" in correct1
assert "Correct:" in correct2
assert "Incorrect:" in incorrect1
assert "Incorrect:" in incorrect2
assert "Accuracy:" in accuracy1
assert "Accuracy:" in accuracy2