DocUA's picture
feat: implement complete message review interface for Standard Verification
713cfc8
# verification_ui.py
"""
Gradio UI components for Verification Mode.
Provides interface components for reviewing classified messages,
collecting verifier feedback, and displaying results.
Requirements: 1.1, 2.1, 2.2, 2.3, 2.4, 2.5, 3.1, 3.3, 3.4, 12.1, 12.2, 12.3, 12.4, 12.5
"""
from __future__ import annotations
import gradio as gr
from typing import List, Dict, Tuple, Optional, Any
from dataclasses import dataclass
from src.core.verification_models import (
VerificationRecord,
VerificationSession,
TestMessage,
TestDataset,
)
from src.core.test_datasets import TestDatasetManager
from src.core.verification_metrics import VerificationMetricsCalculator
from src.interface.ui_consistency_components import (
StandardizedComponents,
ClassificationDisplay,
ProgressDisplay,
ErrorDisplay,
SessionDisplay,
HelpDisplay
)
@dataclass
class UIState:
"""State container for verification UI."""
current_session: Optional[VerificationSession] = None
current_dataset: Optional[TestDataset] = None
message_queue: List[TestMessage] = None
current_message_index: int = 0
def __post_init__(self):
if self.message_queue is None:
self.message_queue = []
class VerificationUIComponents:
"""Manages Gradio UI components for verification mode."""
# Color mappings for classification badges
BADGE_COLORS = {
"green": "🟒",
"yellow": "🟑",
"red": "πŸ”΄",
}
BADGE_LABELS = {
"green": "GREEN - No Distress",
"yellow": "YELLOW - Potential Distress",
"red": "RED - Severe Distress",
}
@staticmethod
def format_confidence_percentage(confidence: float) -> str:
"""
Format confidence score as percentage using standardized components.
Args:
confidence: Confidence score (0.0-1.0)
Returns:
Formatted percentage string with consistent styling
"""
return ClassificationDisplay.format_confidence_display(confidence)
@staticmethod
def format_indicators_as_bullets(indicators: List[str]) -> str:
"""
Format indicators using standardized components.
Args:
indicators: List of indicator strings
Returns:
Formatted indicators string with consistent styling
"""
return ClassificationDisplay.format_indicators_display(indicators)
@staticmethod
def get_classifier_decision_badge(decision: str) -> str:
"""
Get classifier decision with colored badge using standardized components.
Args:
decision: Classification decision ("green", "yellow", "red")
Returns:
Formatted badge string with emoji and label
"""
return ClassificationDisplay.format_classification_badge(decision)
@staticmethod
def create_dataset_selector_component() -> gr.Component:
"""
Create dataset selector component.
Returns:
Gradio component for dataset selection
"""
datasets = TestDatasetManager.get_dataset_list()
# Create dataset options with descriptions
dataset_options = [
f"{d['name']} ({d['message_count']} messages)"
for d in datasets
]
return gr.Dropdown(
choices=dataset_options,
label="πŸ“Š Select Dataset to Verify",
info="Choose which test dataset to review",
interactive=True,
)
@staticmethod
def create_dataset_metadata_display() -> gr.Component:
"""
Create dataset metadata display component.
Returns:
Gradio component for displaying dataset metadata
"""
return gr.Markdown(
value="Select a dataset to view details",
label="πŸ“‹ Dataset Details",
)
@staticmethod
def render_dataset_metadata(dataset: TestDataset) -> str:
"""
Render dataset metadata for display.
Args:
dataset: Test dataset to display metadata for
Returns:
Formatted markdown string with dataset metadata
"""
if dataset is None:
return "No dataset selected"
metadata = f"""### {dataset.name}
**Description:** {dataset.description}
**Message Count:** {dataset.message_count} messages
**Dataset ID:** `{dataset.dataset_id}`
"""
return metadata
@staticmethod
def render_dataset_selection_confirmation(dataset: TestDataset) -> str:
"""
Render dataset selection confirmation message.
Args:
dataset: Selected test dataset
Returns:
Formatted confirmation message
"""
if dataset is None:
return "No dataset selected"
confirmation = f"""βœ“ **Dataset Selected**
You have selected: **{dataset.name}**
This dataset contains **{dataset.message_count} messages** to verify.
Click "Start Verification" to begin reviewing messages.
"""
return confirmation
@staticmethod
def create_session_resumption_component() -> Tuple[gr.Component, gr.Component]:
"""
Create session resumption components using standardized components.
Returns:
Tuple of (resume_button, new_session_button) components
"""
resume_btn = StandardizedComponents.create_primary_button("Resume Previous Session", "▢️", "lg")
resume_btn.scale = 1
new_session_btn = StandardizedComponents.create_secondary_button("Start New Session", "✨", "lg")
new_session_btn.scale = 1
return resume_btn, new_session_btn
@staticmethod
def create_message_review_component() -> Tuple[gr.Component, gr.Component, gr.Component, gr.Component]:
"""
Create message review component with all required elements.
Returns:
Tuple of (message_text, decision_badge, confidence, indicators) components
"""
message_text = gr.Textbox(
label="πŸ“ Patient Message",
interactive=False,
lines=4,
max_lines=6,
)
decision_badge = gr.Markdown(
value="πŸ”„ Loading...",
label="🎯 Classifier Decision",
)
confidence = gr.Markdown(
value="Loading...",
label="πŸ“Š Confidence Level",
)
indicators = gr.Markdown(
value="Loading...",
label="πŸ” Detected Indicators",
)
return message_text, decision_badge, confidence, indicators
@staticmethod
def create_feedback_buttons() -> Tuple[gr.Component, gr.Component]:
"""
Create feedback buttons for correct/incorrect using standardized components.
Returns:
Tuple of (correct_button, incorrect_button) components
"""
correct_btn = StandardizedComponents.create_primary_button("Correct", "βœ“", "lg")
correct_btn.scale = 1
incorrect_btn = StandardizedComponents.create_stop_button("Incorrect", "βœ—", "lg")
incorrect_btn.scale = 1
return correct_btn, incorrect_btn
@staticmethod
def create_correction_selector() -> Tuple[gr.Component, gr.Component]:
"""
Create correction selector for incorrect classifications using standardized components.
Returns:
Tuple of (correction_selector, notes_field) components
"""
correction_selector = ClassificationDisplay.create_classification_radio()
notes_field = gr.Textbox(
label="πŸ“ Optional Notes (Why is this incorrect?)",
placeholder="e.g., 'Missed anxiety indicators', 'False positive'",
lines=2,
interactive=True,
)
return correction_selector, notes_field
@staticmethod
def create_progress_display() -> gr.Component:
"""
Create progress display component.
Returns:
Gradio component for progress display
"""
return gr.Markdown(
value="πŸ“Š Progress: 0 of 0 messages reviewed",
label="Progress",
)
@staticmethod
def create_statistics_panel() -> Tuple[gr.Component, gr.Component, gr.Component]:
"""
Create statistics display panel.
Returns:
Tuple of (correct_count, incorrect_count, accuracy) components
"""
correct_count = gr.Markdown(
value="βœ“ Correct: 0",
label="Correct Classifications",
)
incorrect_count = gr.Markdown(
value="βœ— Incorrect: 0",
label="Incorrect Classifications",
)
accuracy = gr.Markdown(
value="πŸ“Š Accuracy: 0%",
label="Overall Accuracy",
)
return correct_count, incorrect_count, accuracy
@staticmethod
def render_message_review(
message: TestMessage,
classifier_decision: str,
classifier_confidence: float,
classifier_indicators: List[str],
) -> Tuple[str, str, str, str]:
"""
Render message review with all components.
Args:
message: Test message to display
classifier_decision: Classifier's decision
classifier_confidence: Classifier's confidence
classifier_indicators: List of detected indicators
Returns:
Tuple of (message_text, decision_badge, confidence, indicators)
"""
message_text = message.text
decision_badge = VerificationUIComponents.get_classifier_decision_badge(
classifier_decision
)
confidence_str = VerificationUIComponents.format_confidence_percentage(
classifier_confidence
)
indicators_str = VerificationUIComponents.format_indicators_as_bullets(
classifier_indicators
)
return message_text, decision_badge, confidence_str, indicators_str
@staticmethod
def update_progress_display(
current_index: int,
total_messages: int,
) -> str:
"""
Update progress display using standardized components.
Args:
current_index: Current message index (0-based)
total_messages: Total messages in dataset
Returns:
Formatted progress string
"""
message_number = current_index + 1
return ProgressDisplay.format_progress_display(message_number, total_messages)
@staticmethod
def update_statistics_display(
correct_count: int,
incorrect_count: int,
) -> Tuple[str, str, str]:
"""
Update statistics display using standardized components.
Args:
correct_count: Number of correct classifications
incorrect_count: Number of incorrect classifications
Returns:
Tuple of (correct_str, incorrect_str, accuracy_str)
"""
total = correct_count + incorrect_count
correct_str = f"βœ“ **Correct:** {correct_count}"
incorrect_str = f"βœ— **Incorrect:** {incorrect_count}"
accuracy_str = ProgressDisplay.format_accuracy_display(correct_count, total)
return correct_str, incorrect_str, accuracy_str
@staticmethod
def create_breakdown_by_type_component() -> gr.Component:
"""
Create breakdown by classification type component.
Returns:
Gradio component for displaying breakdown by type
"""
return gr.Markdown(
value="🟒 GREEN: 0 correct | 🟑 YELLOW: 0 correct | πŸ”΄ RED: 0 correct",
label="Breakdown by Classification Type",
)
@staticmethod
def update_breakdown_by_type(
records: List[VerificationRecord],
) -> str:
"""
Update breakdown by classification type.
Args:
records: List of verification records
Returns:
Formatted breakdown string
"""
breakdown = {}
for classification_type in ["green", "yellow", "red"]:
type_records = [
r for r in records
if r.classifier_decision == classification_type
]
correct_count = sum(1 for r in type_records if r.is_correct)
breakdown[classification_type] = correct_count
return (
f"🟒 GREEN: {breakdown['green']} correct | "
f"🟑 YELLOW: {breakdown['yellow']} correct | "
f"πŸ”΄ RED: {breakdown['red']} correct"
)
@staticmethod
def create_summary_card_component() -> gr.Component:
"""
Create summary card component for session completion.
Returns:
Gradio component for displaying summary card
"""
return gr.Markdown(
value="## Session Summary\n\nNo session data yet.",
label="Session Summary",
)
@staticmethod
def render_summary_card(
session: VerificationSession,
records: List[VerificationRecord],
) -> str:
"""
Render summary card for session completion.
Args:
session: Verification session
records: List of verification records
Returns:
Formatted summary card markdown
"""
if not records:
return "## Session Summary\n\nNo messages verified yet."
total = len(records)
correct_count = sum(1 for r in records if r.is_correct)
incorrect_count = total - correct_count
accuracy = (correct_count / total) * 100 if total > 0 else 0
# Get breakdown by type
breakdown = {}
for classification_type in ["green", "yellow", "red"]:
type_records = [
r for r in records
if r.classifier_decision == classification_type
]
correct_count_type = sum(1 for r in type_records if r.is_correct)
breakdown[classification_type] = correct_count_type
summary = f"""## Session Summary
**Dataset:** {session.dataset_name}
**Overall Results:**
- Total Messages Reviewed: {total}
- Correct Classifications: {correct_count}
- Incorrect Classifications: {incorrect_count}
- Overall Accuracy: {accuracy:.1f}%
**Breakdown by Classification Type:**
- 🟒 GREEN: {breakdown['green']} correct
- 🟑 YELLOW: {breakdown['yellow']} correct
- πŸ”΄ RED: {breakdown['red']} correct
**Session Status:** {'βœ“ Complete' if session.is_complete else '⏳ In Progress'}
"""
return summary
@staticmethod
def create_session_info_display() -> gr.Component:
"""
Create session info display component.
Returns:
Gradio component for displaying session information
"""
return gr.Markdown(
value="No active session",
label="Session Info",
)
@staticmethod
def render_session_info(session: VerificationSession) -> str:
"""
Render session information display using standardized components.
Args:
session: Verification session
Returns:
Formatted session info markdown
"""
if session is None:
return "No active session"
session_data = {
'verifier_name': session.verifier_name,
'mode_type': getattr(session, 'mode_type', 'standard'),
'dataset_name': session.dataset_name,
'verified_count': session.verified_count,
'total_messages': session.total_messages,
'is_complete': session.is_complete,
'accuracy': (session.correct_count / session.verified_count * 100) if session.verified_count > 0 else 0,
'created_at': session.created_at
}
return SessionDisplay.format_session_info(session_data)