Spaces:
Running
on
Zero
Running
on
Zero
| """ | |
| Alignment and Visualization Generation | |
| This module provides functions for generating HTML visualizations of phoneme- and word-level | |
| alignment for Quranic recitation evaluation. It aligns predicted and canonical phoneme | |
| sequences, color-codes alignment differences (correct, substitution, insertion, deletion), | |
| renders tables (scrolling or chunked), aggregates results for segmented audio, and produces | |
| interactive UI elements for feedback (including per-segment audio playback and error reports). | |
| Key capabilities: | |
| - Per-segment alignment visualization for segmented audio submissions. | |
| - Word-level and phoneme-level feedback, including error marking and metrics. | |
| - Flexible rendering styles: inline vs. combined tables, collapsible sections, and error | |
| grouping. | |
| - UI elements for reference/user audio, interactive playback, and lazy loading of segments. | |
| - Compatibility with error-analysis rendering (for advanced UI sort/grouping). | |
| - Designed for integration into full-stack Gradio/app interfaces or downstream UI frameworks. | |
| Most functions return HTML or HTML + metrics, designed for direct embedding in frontend UIs. | |
| """ | |
| import sys | |
| from pathlib import Path | |
| # Add parent directory to path for imports | |
| sys.path.insert(0, str(Path(__file__).parent.parent)) | |
| from i8n import t | |
| from config import ( | |
| COLORS, | |
| PHONEME_TABLE_STYLE, | |
| PHONEME_TABLE_CHUNK_SIZE, | |
| SEGMENT_PHONEME_TABLE_LOCATION, | |
| SEGMENT_ERROR_TABLE_LOCATION, | |
| SEGMENT_PHONEME_TABLE_COLLAPSED, | |
| ) | |
| from .metrics import align_sequences, PER_single | |
| from .word_recovery import recover_words, recover_words_for_segment | |
| from recitation_analysis.ui.tooltip import get_tooltip_css | |
| from recitation_analysis.text_display.error_highlighting import render_combined_error_table | |
| # Note: segment audio clips are provided directly by the pipeline. | |
| def _generate_segment_divider(segment_num, total_segments, time_range, word_range, is_processing=False, status=None): | |
| """Generate a divider between segments. | |
| Args: | |
| segment_num: Current segment number (1-based) | |
| total_segments: Total number of segments | |
| time_range: Time range string (e.g., "1.2s - 3.5s") - kept for API compatibility but not displayed | |
| word_range: Word range string (e.g., "1-4") | |
| is_processing: If True, show blinking orange animation | |
| status: 'success' for ✓, 'error' for ✗, 'warning' for ⚠️, None for no indicator | |
| """ | |
| # Determine status indicator | |
| status_icon = "" | |
| if status == "success": | |
| status_icon = " ✅" | |
| elif status == "error": | |
| status_icon = " ❌" | |
| elif status == "warning": | |
| status_icon = " ⚠️" | |
| if is_processing: | |
| # Blinking orange divider for processing state | |
| return f''' | |
| <style> | |
| @keyframes blink-orange {{ | |
| 0%, 100% {{ opacity: 1; }} | |
| 50% {{ opacity: 0.3; }} | |
| }} | |
| </style> | |
| <div style="display: flex; align-items: center; margin: 20px 0 10px 0; direction: ltr;"> | |
| <div style="flex-grow: 1; height: 3px; background: linear-gradient(to right, transparent, #fb8c00); animation: blink-orange 1s ease-in-out infinite;"></div> | |
| <div style="padding: 0 15px; font-size: 13px; color: #fb8c00; font-weight: bold; animation: blink-orange 1s ease-in-out infinite;"> | |
| {t("segments.processing_template", n=segment_num, total=total_segments, range=word_range)} | |
| </div> | |
| <div style="flex-grow: 1; height: 3px; background: linear-gradient(to left, transparent, #fb8c00); animation: blink-orange 1s ease-in-out infinite;"></div> | |
| </div> | |
| ''' | |
| else: | |
| # Normal blue divider for completed segments with status indicator | |
| return f''' | |
| <div style="display: flex; align-items: center; margin: 20px 0 10px 0; direction: ltr;"> | |
| <div style="flex-grow: 1; height: 2px; background: linear-gradient(to right, transparent, var(--primary-500, #3b82f6));"></div> | |
| <div style="padding: 0 15px; font-size: 13px; color: var(--primary-500, #3b82f6); font-weight: bold;"> | |
| {t("segments.header_template", n=segment_num, total=total_segments, range=word_range)}{status_icon} | |
| </div> | |
| <div style="flex-grow: 1; height: 2px; background: linear-gradient(to left, transparent, var(--primary-500, #3b82f6));"></div> | |
| </div> | |
| ''' | |
| def _generate_scroll_table(expected_row, actual_row, diff_row): | |
| """Generate a single horizontally scrolling table.""" | |
| html = f''' | |
| <div style="font-family: monospace; font-size: 14px; margin: 10px 0;"> | |
| <div style="border: 1px solid var(--border-color-primary, #e5e7eb); border-radius: 5px; background: var(--background-fill-primary, white); overflow-x: auto;"> | |
| <table style="border-collapse: collapse; white-space: nowrap;"> | |
| <tr style="background-color: var(--table-even-background-fill, #f9fafb); border-bottom: 1px solid var(--border-color-primary, #e5e7eb);"> | |
| <td style="padding: 8px 12px; font-weight: bold; border-right: 1px solid var(--border-color-primary, #e5e7eb); background-color: var(--table-even-background-fill, #f9fafb); position: sticky; left: 0; z-index: 1; color: var(--body-text-color, inherit);">{t("phoneme_alignment.expected")}</td> | |
| ''' | |
| for token in expected_row: | |
| html += f'<td style="padding: 8px; text-align: center; border-right: 1px solid var(--border-color-accent, #f3f4f6); min-width: 40px; color: var(--body-text-color, inherit);">{token}</td>' | |
| html += f''' | |
| </tr> | |
| <tr style="border-bottom: 1px solid var(--border-color-primary, #e5e7eb);"> | |
| <td style="padding: 8px 12px; font-weight: bold; border-right: 1px solid var(--border-color-primary, #e5e7eb); background-color: var(--background-fill-primary, white); position: sticky; left: 0; z-index: 1; color: var(--body-text-color, inherit);">{t("phoneme_alignment.your_rec")}</td> | |
| ''' | |
| for token in actual_row: | |
| html += f'<td style="padding: 8px; text-align: center; border-right: 1px solid var(--border-color-accent, #f3f4f6); min-width: 40px; color: var(--body-text-color, inherit);">{token}</td>' | |
| html += f''' | |
| </tr> | |
| <tr> | |
| <td style="padding: 8px 12px; font-weight: bold; border-right: 1px solid var(--border-color-primary, #e5e7eb); background-color: var(--background-fill-primary, white); position: sticky; left: 0; z-index: 1; color: var(--body-text-color, inherit);">{t("phoneme_alignment.diff")}</td> | |
| ''' | |
| for symbol, color in diff_row: | |
| html += f'<td style="padding: 8px; text-align: center; border-right: 1px solid var(--border-color-accent, #f3f4f6); min-width: 40px; {color} color: var(--body-text-color, inherit);">{symbol}</td>' | |
| html += ''' | |
| </tr> | |
| </table> | |
| </div> | |
| </div> | |
| ''' | |
| return html | |
| def _generate_chunked_table(expected_row, actual_row, diff_row): | |
| """Generate multiple tables split into chunks.""" | |
| chunk_size = PHONEME_TABLE_CHUNK_SIZE | |
| num_chunks = (len(expected_row) + chunk_size - 1) // chunk_size | |
| html = '<div style="font-family: monospace; font-size: 14px; margin: 10px 0;">' | |
| for chunk_idx in range(num_chunks): | |
| start_idx = chunk_idx * chunk_size | |
| end_idx = min(start_idx + chunk_size, len(expected_row)) | |
| expected_chunk = expected_row[start_idx:end_idx] | |
| actual_chunk = actual_row[start_idx:end_idx] | |
| diff_chunk = diff_row[start_idx:end_idx] | |
| # Add chunk number if multiple chunks | |
| if num_chunks > 1: | |
| html += f'<div style="font-weight: bold; margin-top: 15px; margin-bottom: 5px;">Part {chunk_idx + 1} of {num_chunks}</div>' | |
| html += ''' | |
| <div style="border: 1px solid var(--border-color-primary, #e5e7eb); border-radius: 5px; background: var(--background-fill-primary, white); margin-bottom: 10px; width: 100%; display: block;"> | |
| <table style="width: 100%; border-collapse: collapse; table-layout: auto; min-width: 100%;"> | |
| <colgroup> | |
| <col style="width: 80px;"> | |
| ''' | |
| for _ in expected_chunk: | |
| html += '<col style="min-width: 40px; width: auto;">' | |
| html += f''' | |
| </colgroup> | |
| <tr style="background-color: var(--table-even-background-fill, #f9fafb); border-bottom: 1px solid var(--border-color-primary, #e5e7eb);"> | |
| <td style="padding: 8px; font-weight: bold; border-right: 1px solid var(--border-color-primary, #e5e7eb); background-color: var(--table-even-background-fill, #f9fafb); white-space: nowrap; color: var(--body-text-color, inherit);">{t("phoneme_alignment.expected")}</td> | |
| ''' | |
| for token in expected_chunk: | |
| html += f'<td style="padding: 8px; text-align: center; border-right: 1px solid #f1f3f4; white-space: nowrap; min-width: 40px;">{token}</td>' | |
| html += f'''</tr> | |
| <tr style="border-bottom: 1px solid var(--border-color-primary, #e5e7eb);"> | |
| <td style="padding: 8px; font-weight: bold; border-right: 1px solid var(--border-color-primary, #e5e7eb); background-color: var(--background-fill-primary, white); white-space: nowrap; color: var(--body-text-color, inherit);">{t("phoneme_alignment.your_rec")}</td>''' | |
| for token in actual_chunk: | |
| html += f'<td style="padding: 8px; text-align: center; border-right: 1px solid #f1f3f4; white-space: nowrap; min-width: 40px;">{token}</td>' | |
| html += f'''</tr> | |
| <tr> | |
| <td style="padding: 8px; font-weight: bold; border-right: 1px solid var(--border-color-primary, #e5e7eb); background-color: var(--background-fill-primary, white); white-space: nowrap; color: var(--body-text-color, inherit);">{t("phoneme_alignment.diff")}</td>''' | |
| for symbol, color in diff_chunk: | |
| html += f'<td style="padding: 8px; text-align: center; border-right: 1px solid #f1f3f4; white-space: nowrap; min-width: 40px; {color}">{symbol}</td>' | |
| html += '''</tr> | |
| </table> | |
| </div> | |
| ''' | |
| html += '</div>' | |
| return html | |
| def create_alignment_visualization(expected_phonemes, actual_phonemes, verse_ref=None, audio_data=None): | |
| """ | |
| Create an HTML visualization of phoneme alignment, color-coding substitutions, insertions, | |
| and deletions between expected and actual phoneme sequences. | |
| - If a verse reference is provided, word-level feedback is included above the phoneme table. | |
| - Handles both plain and pre-computed alignments, using error pipeline outputs if available. | |
| - Rendering style (scroll or chunked tables) and output appearance depend on config settings. | |
| Args: | |
| expected_phonemes: Space-separated expected phoneme string | |
| actual_phonemes: Space-separated actual phoneme string (from user's recitation) | |
| verse_ref: (Optional) Verse reference for word-level feedback and advanced alignment | |
| audio_data: (Optional) Tuple of (sample_rate, audio_array) for duration analysis | |
| Returns: | |
| Tuple of (html_string, accuracy, expected_count, actual_count) | |
| html_string: HTML visualization for embedding | |
| accuracy: Accuracy percentage (100 - PER) | |
| expected_count: Number of expected phonemes | |
| actual_count: Number of actual phonemes | |
| """ | |
| try: | |
| # Generate word-level feedback if verse_ref is provided | |
| word_html = "" | |
| recitation_result = None | |
| if verse_ref: | |
| word_feedback, word_error, recitation_result = recover_words(verse_ref, expected_phonemes, actual_phonemes, audio_data) | |
| if word_feedback: | |
| word_html = word_feedback | |
| elif word_error: | |
| # Show error but continue with phoneme alignment | |
| word_html = f'<div style="color: var(--body-text-color-subdued, #6b7280); padding: 8px; font-size: 13px;">ℹ️ Word-level feedback unavailable: {word_error}</div>' | |
| # Tokenize phonemes for alignment and metrics | |
| expected_tokens = expected_phonemes.split() | |
| actual_tokens = actual_phonemes.split() | |
| # Reuse alignment from error pipeline if available (avoids O(n*m) recomputation) | |
| if recitation_result and recitation_result.phoneme_alignment: | |
| alignment = list(recitation_result.phoneme_alignment) | |
| else: | |
| # Fallback: compute alignment (only when no verse_ref or error occurred) | |
| alignment = align_sequences(expected_tokens, actual_tokens) | |
| # Prepare data for 3-row table | |
| expected_row = [] | |
| actual_row = [] | |
| diff_row = [] | |
| for ref_tok, hyp_tok, op in alignment: | |
| if op == "C": # Correct | |
| expected_row.append(ref_tok) | |
| actual_row.append(hyp_tok) | |
| diff_row.append(("", "")) # No symbol, no color | |
| elif op == "S": # Substitution | |
| expected_row.append(ref_tok) | |
| actual_row.append(hyp_tok) | |
| diff_row.append(("✗", COLORS["substitution"])) | |
| elif op == "D": # Deletion | |
| expected_row.append(ref_tok) | |
| actual_row.append("—") | |
| diff_row.append(("−", COLORS["deletion"])) | |
| elif op == "I": # Insertion | |
| expected_row.append("—") | |
| actual_row.append(hyp_tok) | |
| diff_row.append(("+", COLORS["insertion"])) | |
| # Calculate PER | |
| per_score = PER_single(actual_phonemes, expected_phonemes) | |
| accuracy = 100 - per_score | |
| # Generate table based on style setting | |
| if PHONEME_TABLE_STYLE == "scroll": | |
| table_html = _generate_scroll_table(expected_row, actual_row, diff_row) | |
| else: | |
| table_html = _generate_chunked_table(expected_row, actual_row, diff_row) | |
| # Wrap in collapsible section (respects config setting) | |
| # Use ltr-preserve class on table only - header should follow RTL direction for Arabic text | |
| details_open = "" if SEGMENT_PHONEME_TABLE_COLLAPSED else "open" | |
| phoneme_html = f''' | |
| <div class="phoneme-alignment"> | |
| <details {details_open} style="margin: 10px 0;"> | |
| <summary style="cursor: pointer; font-weight: bold; font-size: 14px; padding: 8px; background-color: var(--background-fill-secondary, #f3f4f6); border-radius: 4px; user-select: none; color: var(--body-text-color, inherit);"> | |
| {t("phoneme_alignment.header")} | |
| </summary> | |
| <div class="ltr-preserve"> | |
| {table_html} | |
| </div> | |
| </details> | |
| </div> | |
| ''' | |
| # Prepend word-level feedback before phoneme alignment | |
| full_html = word_html + phoneme_html | |
| return full_html, accuracy, len(expected_tokens), len(actual_tokens) | |
| except Exception as e: | |
| error_html = f'<div style="color: red; padding: 10px;">Error creating alignment: {str(e)}</div>' | |
| return error_html, 0, 0, 0 | |
| def format_metrics_html(accuracy, expected_count, actual_count): | |
| """ | |
| Format metrics as HTML for embedding in alignment visualizations— | |
| includes expected count, predicted (actual) count, and accuracy percentage. | |
| Args: | |
| accuracy: Accuracy percentage | |
| expected_count: Number of expected phonemes | |
| actual_count: Number of actual phonemes | |
| Returns: | |
| HTML string with formatted metrics | |
| """ | |
| html = f''' | |
| <div style="display: flex; justify-content: space-around; padding: 12px; background-color: var(--background-fill-secondary, #f3f4f6); border-radius: 5px; margin: 10px 0; border: 1px solid var(--border-color-primary, #e5e7eb);"> | |
| <div style="text-align: center;"> | |
| <div style="font-size: 11px; color: var(--body-text-color-subdued, #6b7280);">Expected</div> | |
| <div style="font-size: 20px; font-weight: bold; color: var(--primary-500, #3b82f6);">{expected_count}</div> | |
| </div> | |
| <div style="text-align: center;"> | |
| <div style="font-size: 11px; color: var(--body-text-color-subdued, #6b7280);">Predicted</div> | |
| <div style="font-size: 20px; font-weight: bold; color: var(--primary-500, #3b82f6);">{actual_count}</div> | |
| </div> | |
| <div style="text-align: center;"> | |
| <div style="font-size: 11px; color: var(--body-text-color-subdued, #6b7280);">Accuracy</div> | |
| <div style="font-size: 20px; font-weight: bold; color: {'#10b981' if accuracy >= 90 else '#f59e0b' if accuracy >= 70 else '#ef4444'};">{accuracy:.1f}%</div> | |
| </div> | |
| </div> | |
| ''' | |
| return html | |
| def create_segmented_alignment_visualization( | |
| segments, | |
| predicted_phonemes_per_segment, | |
| verse_ref, | |
| canonical_text, | |
| coverage_warning=None, | |
| reference_audio_clips=None, | |
| user_audio_clips=None, | |
| ): | |
| """ | |
| Create a rich HTML visualization for segmented audio recitation, | |
| generating per-segment feedback boxes with audio controls, | |
| word-level and phoneme-level alignments, and error summaries. | |
| - For each segment: | |
| - Displays audio controls (reference and user recordings). | |
| - Highlights word-level feedback and phoneme alignment table. | |
| - Marks errors, pending states, and disables alignment if segment is missing or errored. | |
| - Supports lazy loading of reference audio and playback for each segment. | |
| - Handles both inline and end-of-block error/phoneme tables based on configuration. | |
| - After all segments, optionally appends combined error and phoneme tables. | |
| Args: | |
| segments: List of SegmentInfo objects from segment_processor (audio/time/word boundaries) | |
| predicted_phonemes_per_segment: List of predicted phoneme strings for each segment (may include None for pending) | |
| verse_ref: Full verse reference string | |
| canonical_text: Full canonical Arabic text for the verse | |
| coverage_warning: (Optional) Warning about incomplete segment/audio coverage (shown at top) | |
| reference_audio_clips: (Optional) List of data URIs for reference audio per segment | |
| user_audio_clips: (Optional) List of data URIs for user's recorded audio per segment | |
| Returns: | |
| Tuple of: | |
| html_string: HTML for embedding in app UI | |
| overall_accuracy: Accuracy percentage aggregated across all segments | |
| total_expected: Total expected phoneme count over all segments | |
| total_actual: Actual predicted phoneme count over all segments | |
| all_results: List[RecitationResult or None] for each segment | |
| """ | |
| if not segments: | |
| return '<div style="color: var(--body-text-color-subdued, #6b7280); padding: 10px;">No segments detected</div>', 0, 0, 0, [] | |
| total_segments = len(segments) | |
| segment_html_parts = [] # Word feedback per segment | |
| all_errors = [] # Collect errors from all segments | |
| all_results = [] # Collect RecitationResult objects for unified table | |
| phoneme_tables = [] # Collect phoneme tables for all segments | |
| segment_metadata = [] # Per-segment data for re-rendering with sort modes | |
| total_expected = 0 | |
| total_actual = 0 | |
| total_correct = 0 | |
| # Add coverage warning if present | |
| if coverage_warning: | |
| # Ensure tooltip CSS is present (segment mode builds multiple blocks). | |
| segment_html_parts.append(get_tooltip_css()) | |
| segment_html_parts.append(f''' | |
| <div style="padding: 10px 15px; background-color: var(--warning-background-fill, #fef3c7); border: 1px solid var(--warning-border-color, #f59e0b); | |
| border-radius: 5px; margin-bottom: 15px; color: var(--warning-text-color, #92400e);"> | |
| {coverage_warning} | |
| </div> | |
| ''') | |
| else: | |
| # Ensure tooltip CSS is present even when no warning. | |
| segment_html_parts.append(get_tooltip_css()) | |
| # Process each segment | |
| for seg_idx, (segment, predicted_phonemes) in enumerate(zip(segments, predicted_phonemes_per_segment)): | |
| seg_num = seg_idx + 1 | |
| # Time range string | |
| time_range = f"{segment.start_time:.1f}s - {segment.end_time:.1f}s" | |
| # Word range string (1-based for display) | |
| word_range = f"{segment.word_start_idx + 1}-{segment.word_end_idx + 1}" | |
| # Check if this segment is currently being processed (None = pending) | |
| is_processing = predicted_phonemes is None and not segment.error | |
| # Handle segment errors | |
| if segment.error: | |
| # Add divider with error status | |
| divider = _generate_segment_divider(seg_num, total_segments, time_range, word_range, is_processing=False, status="error") | |
| segment_html_parts.append(divider) | |
| segment_html_parts.append(f''' | |
| <div style="padding: 10px; background-color: var(--error-background-fill, #fee2e2); border: 1px solid var(--error-border-color, #ef4444); | |
| border-radius: 5px; color: var(--error-text-color, #991b1b); margin: 10px 0;"> | |
| ⚠️ Segment {seg_num}: {segment.error} | |
| </div> | |
| ''') | |
| all_results.append(None) # Maintain 1:1 correspondence with segments | |
| continue | |
| # Check for pending status (None) | |
| if predicted_phonemes is None: | |
| # Add blinking divider for processing | |
| divider = _generate_segment_divider(seg_num, total_segments, time_range, word_range, is_processing=True) | |
| segment_html_parts.append(divider) | |
| segment_html_parts.append(f''' | |
| <div style="padding: 20px; background-color: rgba(251, 140, 0, 0.1); border: 2px dashed #fb8c00; | |
| border-radius: 8px; color: #fb8c00; margin: 10px 0; text-align: center;"> | |
| <div style="display: inline-block; width: 20px; height: 20px; border: 3px solid rgba(251, 140, 0, 0.3); border-top: 3px solid #fb8c00; border-radius: 50%; animation: spin 1s linear infinite; margin-right: 10px; vertical-align: middle;"></div> | |
| <style> | |
| @keyframes spin {{ | |
| 0% {{ transform: rotate(0deg); }} | |
| 100% {{ transform: rotate(360deg); }} | |
| }} | |
| </style> | |
| <span style="font-weight: 600;">Processing segment {seg_num}...</span> | |
| </div> | |
| ''') | |
| all_results.append(None) # Maintain 1:1 correspondence with segments | |
| continue | |
| if not predicted_phonemes: | |
| divider = _generate_segment_divider(seg_num, total_segments, time_range, word_range, is_processing=False, status="warning") | |
| segment_html_parts.append(divider) | |
| # Show matched verse info if available, even without phoneme alignment | |
| verse_info = f" ({segment.matched_ref})" if segment.matched_ref else "" | |
| text_info = f"<br><span style='font-size: 0.9em;'>{segment.matched_text}</span>" if segment.matched_text else "" | |
| segment_html_parts.append(f''' | |
| <div style="padding: 12px; background-color: rgba(251, 140, 0, 0.1); border: 1px solid #fb8c00; | |
| border-radius: 6px; color: var(--body-text-color, #374151); margin: 10px 0;"> | |
| <div style="font-weight: 600; color: #fb8c00; margin-bottom: 4px;"> | |
| ⚠️ Segment {seg_num}{verse_info}: Phoneme transcription failed | |
| </div> | |
| <div style="font-size: 0.85em; color: var(--body-text-color-subdued, #6b7280);"> | |
| Audio segment processed but phoneme extraction returned empty. Check logs for details. | |
| </div>{text_info} | |
| </div> | |
| ''') | |
| all_results.append(None) # Maintain 1:1 correspondence with segments | |
| continue | |
| # Collect segment content first to determine status | |
| segment_content_parts = [] | |
| segment_has_errors = False | |
| # Audio section: reference reciter (left) and user audio (right) in 2-column layout | |
| ref_clip = reference_audio_clips[seg_idx] if reference_audio_clips and seg_idx < len(reference_audio_clips) else None | |
| user_clip = user_audio_clips[seg_idx] if user_audio_clips and seg_idx < len(user_audio_clips) else None | |
| # Build segment info JSON for lazy loading (used when ref_clip is None) | |
| import json | |
| segment_info_json = json.dumps({ | |
| "verse_ref": verse_ref, | |
| "word_start": segment.word_start_idx, | |
| "word_end": segment.word_end_idx | |
| }).replace('"', '"') | |
| # Always show audio section if we have user clip OR segment info for lazy loading | |
| has_segment_info = segment.word_start_idx is not None and segment.word_end_idx is not None | |
| if ref_clip or user_clip or has_segment_info: | |
| segment_content_parts.append('<div style="display: flex; gap: 10px; margin: 10px 0;">') | |
| # {t("segments.reference_audio")} audio (left column) - either embedded or lazy load | |
| if ref_clip: | |
| # Pre-loaded audio clip | |
| segment_content_parts.append(f''' | |
| <div style="flex: 1; padding: 10px 12px; border: 1px solid var(--border-color-accent, #e5e7eb); border-radius: 10px; background: var(--background-fill-secondary, #f3f4f6);"> | |
| <div style="font-weight: 700; font-size: 13px; margin-bottom: 6px; color: var(--body-text-color, inherit); text-align: center;">{t("segments.reference_audio")}</div> | |
| <audio controls style="width: 100%;"> | |
| <source src="{ref_clip}" type="audio/wav"> | |
| Your browser does not support the audio element. | |
| </audio> | |
| </div> | |
| ''') | |
| elif has_segment_info: | |
| # Lazy load placeholder - click to load | |
| segment_content_parts.append(f''' | |
| <div id="ref-clip-{seg_idx}" class="ref-clip-container" style="flex: 1; padding: 10px 12px; border: 1px solid var(--border-color-accent, #e5e7eb); border-radius: 10px; background: var(--background-fill-secondary, #f3f4f6);" | |
| data-segment="{segment_info_json}"> | |
| <div style="font-weight: 700; font-size: 13px; margin-bottom: 6px; color: var(--body-text-color, inherit); text-align: center;">{t("segments.reference_audio")}</div> | |
| <div class="ref-clip-placeholder" style="display: flex; align-items: center; gap: 8px;"> | |
| <button onclick="loadSegmentClip({seg_idx})" style="padding: 6px 12px; background: var(--button-primary-background-fill, #2563eb); color: white; border: none; border-radius: 6px; cursor: pointer; font-size: 13px;"> | |
| ▶ Load Audio | |
| </button> | |
| <span style="font-size: 12px; color: var(--body-text-color-subdued, #666);">Click to load reference</span> | |
| </div> | |
| <audio controls style="width: 100%; display: none;"> | |
| Your browser does not support the audio element. | |
| </audio> | |
| </div> | |
| ''') | |
| # User's recorded audio (right column) | |
| if user_clip: | |
| segment_content_parts.append(f''' | |
| <div style="flex: 1; padding: 10px 12px; border: 1px solid var(--border-color-accent, #e5e7eb); border-radius: 10px; background: var(--background-fill-secondary, #f3f4f6);"> | |
| <div style="font-weight: 700; font-size: 13px; margin-bottom: 6px; color: var(--body-text-color, inherit); text-align: center;">{t("segments.user_audio")}</div> | |
| <audio controls style="width: 100%;"> | |
| <source src="{user_clip}" type="audio/wav"> | |
| Your browser does not support the audio element. | |
| </audio> | |
| </div> | |
| ''') | |
| segment_content_parts.append('</div>') | |
| # Word-level feedback for this segment (segment treated as stopping at end). | |
| segment_ref = segment.matched_ref if hasattr(segment, 'matched_ref') and segment.matched_ref else None | |
| word_html, segment_errors, resolved_canonical_phonemes, segment_result = recover_words_for_segment( | |
| verse_ref, | |
| predicted_phonemes, | |
| word_start_idx=segment.word_start_idx, | |
| word_end_idx=segment.word_end_idx, | |
| segment_ref=segment_ref, | |
| ) | |
| if word_html: | |
| segment_content_parts.append(word_html) | |
| if segment_errors: | |
| segment_has_errors = True | |
| all_errors.extend(segment_errors) | |
| # Collect RecitationResult for unified table rendering | |
| # Always append to maintain 1:1 correspondence with segments (can be None) | |
| all_results.append(segment_result) | |
| # Render inline error table if configured (no title for inline) | |
| if segment_result and SEGMENT_ERROR_TABLE_LOCATION == "inline": | |
| inline_error_table = render_combined_error_table( | |
| [segment_result], title="" | |
| ) | |
| if inline_error_table: | |
| segment_content_parts.append(inline_error_table) | |
| # Prefer the separately-phonemized canonical phonemes (segment treated as stopping); | |
| # fall back to the segment processor output if needed. | |
| segment_canonical_phonemes = resolved_canonical_phonemes or segment.canonical_phonemes | |
| if not segment_canonical_phonemes: | |
| segment_content_parts.append(f''' | |
| <div style="padding: 10px; color: var(--body-text-color-subdued, #6b7280); margin: 10px 0;"> | |
| Segment {seg_num}: No canonical phonemes available | |
| </div> | |
| ''') | |
| # Still add divider and wrapped content box | |
| status = "error" if segment_has_errors else "success" | |
| divider = _generate_segment_divider(seg_num, total_segments, time_range, word_range, is_processing=False, status=status) | |
| segment_html_parts.append(divider) | |
| segment_box = f''' | |
| <div style="border: 1px solid var(--border-color-primary, #e5e7eb); border-radius: 8px; padding: 16px; margin-bottom: 12px; background-color: var(--background-fill-secondary, #f9fafb);"> | |
| {''.join(segment_content_parts)} | |
| </div> | |
| ''' | |
| segment_html_parts.append(segment_box) | |
| continue | |
| # Create phoneme alignment for this segment | |
| try: | |
| # Reuse alignment from segment_result if available (avoids O(n*m) recomputation) | |
| if segment_result and segment_result.phoneme_alignment: | |
| alignment = list(segment_result.phoneme_alignment) | |
| expected_tokens = segment_canonical_phonemes.split() | |
| actual_tokens = predicted_phonemes.split() | |
| else: | |
| # Fallback: compute alignment | |
| expected_tokens = segment_canonical_phonemes.split() | |
| actual_tokens = predicted_phonemes.split() | |
| alignment = align_sequences(expected_tokens, actual_tokens) | |
| expected_row = [] | |
| actual_row = [] | |
| diff_row = [] | |
| correct_count = 0 | |
| for ref_tok, hyp_tok, op in alignment: | |
| if op == "C": | |
| expected_row.append(ref_tok) | |
| actual_row.append(hyp_tok) | |
| diff_row.append(("", "")) | |
| correct_count += 1 | |
| elif op == "S": | |
| expected_row.append(ref_tok) | |
| actual_row.append(hyp_tok) | |
| diff_row.append(("✗", COLORS["substitution"])) | |
| elif op == "D": | |
| expected_row.append(ref_tok) | |
| actual_row.append("—") | |
| diff_row.append(("−", COLORS["deletion"])) | |
| elif op == "I": | |
| expected_row.append("—") | |
| actual_row.append(hyp_tok) | |
| diff_row.append(("+", COLORS["insertion"])) | |
| total_expected += len(expected_tokens) | |
| total_actual += len(actual_tokens) | |
| total_correct += correct_count | |
| segment_accuracy = (correct_count / len(expected_tokens) * 100) if expected_tokens else 0 | |
| # Render inline or store for combined display at end | |
| if SEGMENT_PHONEME_TABLE_LOCATION == "inline": | |
| # Generate table | |
| if PHONEME_TABLE_STYLE == "scroll": | |
| table_html = _generate_scroll_table(expected_row, actual_row, diff_row) | |
| else: | |
| table_html = _generate_chunked_table(expected_row, actual_row, diff_row) | |
| # If collapsed mode, wrap in collapsible details; otherwise show directly | |
| if SEGMENT_PHONEME_TABLE_COLLAPSED: | |
| inline_phoneme_html = f''' | |
| <details style="margin: 10px 0;"> | |
| <summary style="cursor: pointer; font-weight: bold; font-size: 13px; padding: 6px; | |
| background-color: var(--background-fill-secondary, #f3f4f6); border-radius: 4px; user-select: none; color: var(--body-text-color, inherit);"> | |
| {t("phoneme_alignment.header")} | |
| </summary> | |
| <div style="margin-top: 8px;"> | |
| {table_html} | |
| </div> | |
| </details> | |
| ''' | |
| else: | |
| # Show directly without collapsible wrapper | |
| inline_phoneme_html = f''' | |
| <div style="margin: 10px 0;"> | |
| {table_html} | |
| </div> | |
| ''' | |
| # Add to segment content (inside the box) | |
| segment_content_parts.append(inline_phoneme_html) | |
| else: | |
| # Store phoneme table data for combined display at end | |
| phoneme_tables.append({ | |
| 'seg_num': seg_num, | |
| 'time_range': time_range, | |
| 'word_range': word_range, | |
| 'expected_row': expected_row, | |
| 'actual_row': actual_row, | |
| 'diff_row': diff_row, | |
| 'accuracy': segment_accuracy, | |
| 'has_errors': segment_has_errors, | |
| }) | |
| except Exception as e: | |
| segment_content_parts.append(f''' | |
| <div style="color: var(--error-text-color, #ef4444); padding: 10px;"> | |
| Error creating alignment for segment {seg_num}: {str(e)} | |
| </div> | |
| ''') | |
| # Now add divider with status based on whether segment has errors | |
| status = "error" if segment_has_errors else "success" | |
| divider = _generate_segment_divider(seg_num, total_segments, time_range, word_range, is_processing=False, status=status) | |
| segment_html_parts.append(divider) | |
| # Wrap all segment content in a single box | |
| segment_box = f''' | |
| <div style="border: 1px solid var(--border-color-primary, #e5e7eb); border-radius: 8px; padding: 16px; margin-bottom: 12px; background-color: var(--background-fill-secondary, #f9fafb);"> | |
| {''.join(segment_content_parts)} | |
| </div> | |
| ''' | |
| segment_html_parts.append(segment_box) | |
| # Count errors by handler type for summary table | |
| error_counts = {} | |
| if segment_result and segment_result.errors: | |
| for error in segment_result.errors: | |
| handler = getattr(error, 'source_handler', '') or '' | |
| # Handle multiple handlers (e.g., "Handler1+Handler2") | |
| for part in handler.split('+'): | |
| # Extract handler name (before ":") | |
| handler_name = part.split(':')[0].strip() if ':' in part else part.strip() | |
| if handler_name: | |
| error_counts[handler_name] = error_counts.get(handler_name, 0) + 1 | |
| # Track segment data for re-rendering with sort modes | |
| segment_metadata.append({ | |
| 'segment_idx': seg_idx, | |
| 'divider_html': divider, | |
| 'segment_html': segment_box, | |
| 'has_errors': segment_has_errors, | |
| 'segment_num': seg_num, | |
| 'error_counts': error_counts, | |
| }) | |
| # Build final HTML: segments feedback -> combined errors (final) -> phoneme tables | |
| all_html_parts = segment_html_parts.copy() | |
| # Add combined error table once all segments are processed (skip if only 1 segment) | |
| # Only render at end if configured for "end" mode (inline mode renders per-segment) | |
| if SEGMENT_ERROR_TABLE_LOCATION == "end": | |
| if all(p is not None for p in predicted_phonemes_per_segment) and total_segments > 1: | |
| combined_table = render_combined_error_table(all_results, title="All Errors") | |
| if combined_table: | |
| all_html_parts.append(combined_table) | |
| # Add combined phoneme alignment tables at the end (only populated in "end" mode) | |
| if phoneme_tables: | |
| details_open = "" if SEGMENT_PHONEME_TABLE_COLLAPSED else "open" | |
| phoneme_html = f''' | |
| <div class="phoneme-alignment"> | |
| <details {details_open} style="margin: 15px 0;"> | |
| <summary style="cursor: pointer; font-weight: bold; font-size: 14px; padding: 8px; | |
| background-color: var(--background-fill-secondary, #f3f4f6); border-radius: 4px; user-select: none; color: var(--body-text-color, inherit);"> | |
| {t("phoneme_alignment.header")} | |
| </summary> | |
| <div class="ltr-preserve" style="margin-top: 10px;"> | |
| ''' | |
| for table_data in phoneme_tables: | |
| # Add mini divider for each segment's phoneme table with status indicator | |
| status_icon = " ❌" if table_data.get('has_errors') else " ✅" | |
| phoneme_html += f''' | |
| <div style="display: flex; align-items: center; margin: 15px 0 5px 0; direction: ltr;"> | |
| <div style="flex-grow: 1; height: 1px; background: var(--border-color-primary, #e5e7eb);"></div> | |
| <div style="padding: 0 10px; font-size: 12px; color: var(--body-text-color-subdued, #6b7280);"> | |
| Segment {table_data['seg_num']}{status_icon} | |
| </div> | |
| <div style="flex-grow: 1; height: 1px; background: var(--border-color-primary, #e5e7eb);"></div> | |
| </div> | |
| ''' | |
| # Generate table | |
| if PHONEME_TABLE_STYLE == "scroll": | |
| table_html = _generate_scroll_table( | |
| table_data['expected_row'], | |
| table_data['actual_row'], | |
| table_data['diff_row'] | |
| ) | |
| else: | |
| table_html = _generate_chunked_table( | |
| table_data['expected_row'], | |
| table_data['actual_row'], | |
| table_data['diff_row'] | |
| ) | |
| phoneme_html += table_html | |
| phoneme_html += ''' | |
| </div> | |
| </details> | |
| </div> | |
| ''' | |
| all_html_parts.append(phoneme_html) | |
| # Calculate overall accuracy | |
| overall_accuracy = (total_correct / total_expected * 100) if total_expected > 0 else 0 | |
| # Cache segment metadata for re-rendering (only when all segments are processed) | |
| # Check that we have metadata for all segments (no pending/error segments skipped) | |
| if len(segment_metadata) == total_segments and all(p is not None for p in predicted_phonemes_per_segment): | |
| from shared_state import set_last_error_segment_data | |
| set_last_error_segment_data(segment_metadata) | |
| full_html = ''.join(all_html_parts) | |
| return full_html, overall_accuracy, total_expected, total_actual, all_results | |