#!/usr/bin/env python3 """ Conversation Verification UI Components. Gradio-based interface for reviewing and verifying AI classifier decisions on patient conversations. """ import gradio as gr from typing import Dict, List, Optional, Tuple, Any from datetime import datetime from src.core.conversation_verification import ( VerificationSession, VerificationRecord, VerificationFeedback, ConversationVerificationManager ) class VerificationInterface: """Gradio-based UI for conversation verification.""" def __init__(self, manager: ConversationVerificationManager): """Initialize verification interface.""" self.manager = manager self.current_session: Optional[VerificationSession] = None self.current_record_index: int = 0 def create_verification_window(self, session: VerificationSession) -> gr.Blocks: """ Create main verification window interface. Args: session: VerificationSession to review Returns: Gradio Blocks interface """ self.current_session = session self.current_record_index = 0 # Pre-compute initial values if session.verification_records: initial_exchange_html = self._render_exchange_review(session.verification_records[0]) initial_position_html = f"Exchange 1 of {len(session.verification_records)}" else: initial_exchange_html = "No exchanges to verify" initial_position_html = "No exchanges" initial_stats_html = self._render_statistics(session) initial_progress_html = self._render_progress_bar(session) initial_nav_html = self._render_navigation_info(session) # Define event handlers before creating the interface def handle_correct_click(): return self._handle_correct_feedback_simple() def show_correction_section(): return gr.update(visible=True) def hide_correction_section(): return gr.update(visible=False) def handle_export_click(): return self._export_results_simple() def handle_mark_all_correct(): return self._mark_all_remaining_correct_simple() # Create interface without theme for HF Spaces compatibility with gr.Blocks(title=f"Verify Conversation - {session.patient_name}") as interface: # Session header with gr.Row(): gr.Markdown(f""" # 🔍 Conversation Verification **Patient:** {session.patient_name} **Verifier:** {session.verifier_name} **Session:** `{session.session_id}` **Started:** {session.start_time.strftime('%Y-%m-%d %H:%M')} """) # Progress section with gr.Row(): progress_bar = gr.HTML(value=initial_progress_html) with gr.Row(): with gr.Column(scale=1): stats_display = gr.HTML(value=initial_stats_html) with gr.Column(scale=1): navigation_info = gr.HTML(value=initial_nav_html) # Main verification area with gr.Row(): with gr.Column(scale=3): # Current exchange display exchange_display = gr.HTML(value=initial_exchange_html, label="Current Exchange") # Verification buttons with gr.Row(): correct_btn = gr.Button("✅ Correct", variant="primary", scale=1) incorrect_btn = gr.Button("❌ Incorrect", variant="secondary", scale=1) # Correction interface (initially hidden) with gr.Column(visible=False) as correction_section: gr.Markdown("### Select Correct Classification:") correction_radio = gr.Radio( choices=["GREEN", "YELLOW", "RED"], label="Correct Classification", interactive=True ) correction_reason = gr.Dropdown( choices=[ "Missed indicators", "False positive", "Context misunderstanding", "Severity misjudgment", "Other" ], label="Correction Reason", interactive=True ) correction_notes = gr.Textbox( label="Additional Notes (Optional)", placeholder="Explain the correction...", lines=3, interactive=True ) with gr.Row(): submit_correction_btn = gr.Button("✅ Submit Correction", variant="primary") cancel_correction_btn = gr.Button("❌ Cancel", variant="secondary") with gr.Column(scale=1): # Navigation controls with gr.Column(): gr.Markdown("### Navigation") with gr.Row(): prev_btn = gr.Button("⬅️ Previous", scale=1) next_btn = gr.Button("Next ➡️", scale=1) current_position = gr.HTML(value=initial_position_html) # Quick actions gr.Markdown("### Quick Actions") mark_all_correct_btn = gr.Button("✅ Mark All Remaining as Correct", size="sm") with gr.Row(): mark_green_correct_btn = gr.Button("🟢 All GREEN Correct", size="sm", scale=1) skip_to_errors_btn = gr.Button("🔍 Skip to Errors", size="sm", scale=1) # Export section gr.Markdown("### Export Results") export_btn = gr.Button("📊 Export to CSV", variant="primary") export_status = gr.HTML(value="") # Wire up event handlers correct_btn.click( fn=handle_correct_click, outputs=[exchange_display, current_position, stats_display, progress_bar] ) incorrect_btn.click( fn=show_correction_section, outputs=[correction_section] ) cancel_correction_btn.click( fn=hide_correction_section, outputs=[correction_section] ) export_btn.click( fn=handle_export_click, outputs=[export_status] ) mark_all_correct_btn.click( fn=handle_mark_all_correct, outputs=[exchange_display, current_position, stats_display, progress_bar] ) return interface def _load_initial_exchange(self, session: VerificationSession, index: int) -> Tuple[str, str, str, str]: """Load the first exchange for verification.""" if not session.verification_records: return "No exchanges to verify", "No exchanges", "", "" record = session.verification_records[index] exchange_html = self._render_exchange_review(record) position_html = f"Exchange {index + 1} of {len(session.verification_records)}" stats_html = self._render_statistics(session) progress_html = self._render_progress_bar(session) return exchange_html, position_html, stats_html, progress_html def _render_exchange_review(self, record: VerificationRecord) -> str: """Render exchange for review.""" # Classification indicator indicator_emoji = {"GREEN": "🟢", "YELLOW": "🟡", "RED": "🔴"} emoji = indicator_emoji.get(record.original_classification, "⚪") # Verification status status_html = "" if record.is_correct is not None: if record.is_correct: status_html = '
✅ Verified as Correct
' else: status_html = f'''
❌ Marked as Incorrect
Correct Classification: {record.correct_classification}
Reason: {record.correction_reason}
{f"Notes: {record.verifier_notes}" if record.verifier_notes else ""}
''' # Indicators display indicators_html = "" if record.original_indicators: indicators_list = ", ".join(record.original_indicators[:3]) if len(record.original_indicators) > 3: indicators_list += f" +{len(record.original_indicators) - 3} more" indicators_html = f"
Indicators: {indicators_list}" html = f"""
{status_html}
👤 Patient Message:
"{record.user_message}"
🤖 AI Response:
{record.assistant_response}
🔍 AI Classification:
{emoji} {record.original_classification} ({int(record.original_confidence * 100)}%) {indicators_html}
Reasoning: {record.original_reasoning}
""" return html def _render_progress_bar(self, session: VerificationSession) -> str: """Render progress bar.""" progress = session.get_progress() percentage = progress.calculate_progress_percentage() return f"""
Progress: {progress.verified_exchanges} of {progress.total_exchanges} verified {percentage:.1f}%
""" def _render_statistics(self, session: VerificationSession) -> str: """Render verification statistics.""" progress = session.get_progress() if progress.verified_exchanges == 0: return """

📊 Statistics

No verifications completed yet.

""" stats_html = f"""

📊 Statistics

Overall Accuracy: {progress.accuracy_overall:.1%}

By Classification:

""" if progress.common_errors: stats_html += "

Common Errors:

" stats_html += "
" return stats_html def _render_navigation_info(self, session: VerificationSession) -> str: """Render navigation information.""" unverified_count = len(session.get_unverified_records()) return f"""

🧭 Navigation

Total Exchanges: {session.total_exchanges}

Remaining: {unverified_count}

Status: {'Complete' if session.is_complete else 'In Progress'}

""" def _handle_correct_feedback(self, session: VerificationSession, index: int) -> Tuple[str, str, str, str, int]: """Handle correct classification feedback.""" try: if index >= len(session.verification_records): return "No more exchanges", f"Exchange {index + 1} of {len(session.verification_records)}", "", "", index record = session.verification_records[index] # Submit feedback feedback = VerificationFeedback( exchange_id=record.exchange_id, is_correct=True ) success = self.manager.submit_exchange_verification(session.session_id, record.exchange_id, feedback) if not success: return "❌ Failed to submit feedback", f"Exchange {index + 1} of {len(session.verification_records)}", "", "", index # Reload session to get updated data updated_session = self.manager.load_session(session.session_id) if not updated_session: return "❌ Failed to reload session", f"Exchange {index + 1} of {len(session.verification_records)}", "", "", index # Update current session reference self.current_session = updated_session # Move to next unverified exchange next_index = self._find_next_unverified_index(updated_session, index) if next_index is not None: next_record = updated_session.verification_records[next_index] exchange_html = self._render_exchange_review(next_record) position_html = f"Exchange {next_index + 1} of {len(updated_session.verification_records)}" else: exchange_html = """

🎉 All exchanges verified!

Great job! You can now export the results using the Export button below.

""" position_html = "Verification Complete" next_index = index stats_html = self._render_statistics(updated_session) progress_html = self._render_progress_bar(updated_session) return exchange_html, position_html, stats_html, progress_html, next_index except Exception as e: print(f"Error in _handle_correct_feedback: {e}") import traceback traceback.print_exc() return f"❌ Error: {str(e)}", f"Exchange {index + 1}", "", "", index def _handle_incorrect_feedback( self, session: VerificationSession, index: int, correct_classification: str, correction_reason: str, notes: str ) -> Tuple[str, str, str, str, int, gr.Column]: """Handle incorrect classification feedback.""" try: if index >= len(session.verification_records): return "No more exchanges", f"Exchange {index + 1} of {len(session.verification_records)}", "", "", index, gr.Column(visible=False) # Validate inputs if not correct_classification: return "❌ Please select correct classification", f"Exchange {index + 1}", "", "", index, gr.Column(visible=True) if not correction_reason: return "❌ Please select correction reason", f"Exchange {index + 1}", "", "", index, gr.Column(visible=True) record = session.verification_records[index] # Submit feedback feedback = VerificationFeedback( exchange_id=record.exchange_id, is_correct=False, correct_classification=correct_classification, correction_reason=correction_reason, notes=notes.strip() if notes and notes.strip() else None ) success = self.manager.submit_exchange_verification(session.session_id, record.exchange_id, feedback) if not success: return "❌ Failed to submit correction", f"Exchange {index + 1}", "", "", index, gr.Column(visible=True) # Reload session updated_session = self.manager.load_session(session.session_id) if not updated_session: return "❌ Failed to reload session", f"Exchange {index + 1}", "", "", index, gr.Column(visible=True) # Update current session reference self.current_session = updated_session # Move to next unverified exchange next_index = self._find_next_unverified_index(updated_session, index) if next_index is not None: next_record = updated_session.verification_records[next_index] exchange_html = self._render_exchange_review(next_record) position_html = f"Exchange {next_index + 1} of {len(updated_session.verification_records)}" else: exchange_html = """

🎉 All exchanges verified!

Great job! You can now export the results using the Export button below.

""" position_html = "Verification Complete" next_index = index stats_html = self._render_statistics(updated_session) progress_html = self._render_progress_bar(updated_session) return exchange_html, position_html, stats_html, progress_html, next_index, gr.Column(visible=False) except Exception as e: print(f"Error in _handle_incorrect_feedback: {e}") import traceback traceback.print_exc() return f"❌ Error: {str(e)}", f"Exchange {index + 1}", "", "", index, gr.Column(visible=True) def _navigate_previous(self, session: VerificationSession, index: int) -> Tuple[str, str, int]: """Navigate to previous exchange.""" new_index = max(0, index - 1) record = session.verification_records[new_index] exchange_html = self._render_exchange_review(record) position_html = f"Exchange {new_index + 1} of {len(session.verification_records)}" return exchange_html, position_html, new_index def _navigate_next(self, session: VerificationSession, index: int) -> Tuple[str, str, int]: """Navigate to next exchange.""" new_index = min(len(session.verification_records) - 1, index + 1) record = session.verification_records[new_index] exchange_html = self._render_exchange_review(record) position_html = f"Exchange {new_index + 1} of {len(session.verification_records)}" return exchange_html, position_html, new_index def _mark_all_remaining_correct(self, session: VerificationSession, current_index: int) -> Tuple[str, str, str, str]: """Mark all remaining unverified exchanges as correct.""" try: unverified_records = session.get_unverified_records() if not unverified_records: return """

⚠️ No unverified exchanges

All exchanges have already been verified.

""", "All Verified", "", "" print(f"🔄 Marking {len(unverified_records)} remaining exchanges as correct...") success_count = 0 for record in unverified_records: feedback = VerificationFeedback( exchange_id=record.exchange_id, is_correct=True ) if self.manager.submit_exchange_verification(session.session_id, record.exchange_id, feedback): success_count += 1 # Reload session updated_session = self.manager.load_session(session.session_id) if not updated_session: return "❌ Failed to reload session", "Error", "", "" # Update current session reference self.current_session = updated_session exchange_html = f"""

🎉 Batch verification complete!

Marked {success_count} exchanges as correct.

You can now export the results using the Export button below.

""" position_html = "Verification Complete" stats_html = self._render_statistics(updated_session) progress_html = self._render_progress_bar(updated_session) return exchange_html, position_html, stats_html, progress_html except Exception as e: print(f"Error in batch verification: {e}") import traceback traceback.print_exc() return f"❌ Batch verification error: {str(e)}", "Error", "", "" def _export_results(self, session: VerificationSession) -> str: """Export verification results to CSV.""" try: # Check if there are any verifications to export verified_count = len([r for r in session.verification_records if r.is_correct is not None]) if verified_count == 0: return """
⚠️ Nothing to Export
Please verify some exchanges first before exporting.
""" from src.core.verification_exporter import VerificationExporter exporter = VerificationExporter() csv_path = exporter.export_session_to_csv(session) # Get file size for display import os file_size = os.path.getsize(csv_path) if os.path.exists(csv_path) else 0 file_size_kb = file_size / 1024 return f"""
✅ Export Successful!
File: {os.path.basename(csv_path)}
Size: {file_size_kb:.1f} KB
Records: {verified_count} verified exchanges
📁 Saved to: verification_exports/
""" except Exception as e: print(f"Export error: {e}") import traceback traceback.print_exc() return f"""
❌ Export Failed
Error: {str(e)}
Check console for details
""" def _find_next_unverified_index(self, session: VerificationSession, current_index: int) -> Optional[int]: """Find the next unverified exchange index.""" for i in range(current_index + 1, len(session.verification_records)): if session.verification_records[i].is_correct is None: return i # If no unverified found after current, check from beginning for i in range(current_index): if session.verification_records[i].is_correct is None: return i return None # All verified def _mark_all_green_correct(self, session: VerificationSession) -> Tuple[str, str, str, str, int]: """Mark all GREEN classifications as correct.""" try: green_records = [r for r in session.verification_records if r.original_classification == 'GREEN' and r.is_correct is None] if not green_records: return """

⚠️ No unverified GREEN exchanges

All GREEN classifications have already been verified.

""", "No GREEN to verify", "", "", 0 print(f"🟢 Marking {len(green_records)} GREEN exchanges as correct...") success_count = 0 for record in green_records: feedback = VerificationFeedback( exchange_id=record.exchange_id, is_correct=True ) if self.manager.submit_exchange_verification(session.session_id, record.exchange_id, feedback): success_count += 1 # Reload session updated_session = self.manager.load_session(session.session_id) if not updated_session: return "❌ Failed to reload session", "Error", "", "", 0 # Update current session reference self.current_session = updated_session # Find next unverified exchange next_index = self._find_next_unverified_index(updated_session, -1) if next_index is not None: next_record = updated_session.verification_records[next_index] exchange_html = self._render_exchange_review(next_record) position_html = f"Exchange {next_index + 1} of {len(updated_session.verification_records)}" else: exchange_html = f"""

🎉 GREEN batch verification complete!

Marked {success_count} GREEN exchanges as correct.

Continue with remaining exchanges or export results.

""" position_html = "GREEN Verified" next_index = 0 stats_html = self._render_statistics(updated_session) progress_html = self._render_progress_bar(updated_session) return exchange_html, position_html, stats_html, progress_html, next_index except Exception as e: print(f"Error in GREEN batch verification: {e}") return f"❌ Error: {str(e)}", "Error", "", "", 0 def _skip_to_next_error(self, session: VerificationSession, current_index: int) -> Tuple[str, str, int]: """Skip to next exchange that needs attention (unverified or incorrect).""" try: # Look for next unverified exchange next_unverified = self._find_next_unverified_index(session, current_index) if next_unverified is not None: record = session.verification_records[next_unverified] exchange_html = self._render_exchange_review(record) position_html = f"Exchange {next_unverified + 1} of {len(session.verification_records)} (Unverified)" return exchange_html, position_html, next_unverified # If no unverified, look for incorrect ones for i in range(len(session.verification_records)): record = session.verification_records[i] if record.is_correct is False: exchange_html = self._render_exchange_review(record) position_html = f"Exchange {i + 1} of {len(session.verification_records)} (Incorrect)" return exchange_html, position_html, i # No errors found return """

✅ No errors found!

All exchanges have been verified and are correct.

""", "No Errors", current_index except Exception as e: print(f"Error in skip to errors: {e}") return f"❌ Error: {str(e)}", "Error", current_index def _handle_correct_feedback_simple(self) -> Tuple[str, str, str, str]: """Simplified correct feedback handler for HF Spaces compatibility.""" try: if not self.current_session or self.current_record_index >= len(self.current_session.verification_records): return "No more exchanges", "Complete", "", "" record = self.current_session.verification_records[self.current_record_index] # Submit feedback from src.core.conversation_verification import VerificationFeedback feedback = VerificationFeedback( exchange_id=record.exchange_id, is_correct=True ) success = self.manager.submit_exchange_verification(self.current_session.session_id, record.exchange_id, feedback) if not success: return "❌ Failed to submit feedback", f"Exchange {self.current_record_index + 1}", "", "" # Reload session updated_session = self.manager.load_session(self.current_session.session_id) if not updated_session: return "❌ Failed to reload session", f"Exchange {self.current_record_index + 1}", "", "" self.current_session = updated_session # Move to next unverified exchange next_index = self._find_next_unverified_index(updated_session, self.current_record_index) if next_index is not None: self.current_record_index = next_index next_record = updated_session.verification_records[next_index] exchange_html = self._render_exchange_review(next_record) position_html = f"Exchange {next_index + 1} of {len(updated_session.verification_records)}" else: exchange_html = """

🎉 All exchanges verified!

Great job! You can now export the results.

""" position_html = "Verification Complete" stats_html = self._render_statistics(updated_session) progress_html = self._render_progress_bar(updated_session) return exchange_html, position_html, stats_html, progress_html except Exception as e: print(f"Error in simplified correct feedback: {e}") return f"❌ Error: {str(e)}", f"Exchange {self.current_record_index + 1}", "", "" def _export_results_simple(self) -> str: """Simplified export results for HF Spaces compatibility.""" try: if not self.current_session: return "❌ No session to export" verified_count = len([r for r in self.current_session.verification_records if r.is_correct is not None]) if verified_count == 0: return """
⚠️ Nothing to Export
Please verify some exchanges first.
""" from src.core.verification_exporter import VerificationExporter exporter = VerificationExporter() csv_path = exporter.export_session_to_csv(self.current_session) import os file_size = os.path.getsize(csv_path) if os.path.exists(csv_path) else 0 file_size_kb = file_size / 1024 return f"""
✅ Export Successful!
File: {os.path.basename(csv_path)}
Size: {file_size_kb:.1f} KB
Records: {verified_count} verified exchanges
""" except Exception as e: return f"""
❌ Export Failed
Error: {str(e)}
""" def _mark_all_remaining_correct_simple(self) -> Tuple[str, str, str, str]: """Simplified mark all remaining correct for HF Spaces compatibility.""" try: if not self.current_session: return "❌ No session", "Error", "", "" unverified_records = self.current_session.get_unverified_records() if not unverified_records: return """

⚠️ No unverified exchanges

All exchanges have already been verified.

""", "All Verified", "", "" success_count = 0 from src.core.conversation_verification import VerificationFeedback for record in unverified_records: feedback = VerificationFeedback( exchange_id=record.exchange_id, is_correct=True ) if self.manager.submit_exchange_verification(self.current_session.session_id, record.exchange_id, feedback): success_count += 1 # Reload session updated_session = self.manager.load_session(self.current_session.session_id) if not updated_session: return "❌ Failed to reload session", "Error", "", "" self.current_session = updated_session exchange_html = f"""

🎉 Batch verification complete!

Marked {success_count} exchanges as correct.

You can now export the results.

""" position_html = "Verification Complete" stats_html = self._render_statistics(updated_session) progress_html = self._render_progress_bar(updated_session) return exchange_html, position_html, stats_html, progress_html except Exception as e: print(f"Error in batch verification: {e}") return f"❌ Error: {str(e)}", "Error", "", ""